VoucherVision / VoucherVision_Reference.yaml
phyloforfun's picture
Add application file
87c3140
raw history blame
No virus
6.17 kB
# To use default value, set to null
leafmachine:
use_RGB_label_images: True
do:
check_for_illegal_filenames: False
check_for_corrupt_images_make_vertical: False
print:
verbose: True
optional_warnings: True
logging:
log_level: null
# Overall Project Input Settings
project:
# Image to Process
dir_images_local: 'D:\Dropbox\LM2_Env\VoucherVision_Datasets\2022_09_07_thru12_S3_jacortez_AllAsia' # 'D:/Dropbox/LM2_Env/VoucherVision_Datasets/Compare_Set_Easy_10imgs/imgs' #'D:\D_Desktop\Richie\Imgs' #'D:/Dropbox/LM2_Env/Image_Datasets/Acacia/Acacia_prickles_4-26-23_LANCZOS/images/short' #'D:\D_Desktop\Richie\Imgs' #'home/brlab/Dropbox/LM2_Env/Image_Datasets/Manuscript_Images' # 'D:\Dropbox\LM2_Env\Image_Datasets\SET_FieldPrism_Test\TESTING_OUTPUT\Images_Processed\REU_Field_QR-Code-Images\Cannon_Corrected\Images_Corrected' # 'F:\temp_3sppFamily' # 'D:/Dropbox/LM2_Env/Image_Datasets/GBIF_BroadSample_3SppPerFamily' # SET_Diospyros/images_short' # 'D:/Dropbox/LM2_Env/Image_Datasets/SET_Diospyros/images_short' #'D:\Dropbox\LM2_Env\Image_Datasets\GBIF_BroadSample_Herbarium' #'D:/Dropbox/LM2_Env/Image_Datasets/SET_Diospyros/images_short' # str | only for image_location:local | full path for directory containing images
# dir_images_local: 'D:/Dropbox/LM2_Env/VoucherVision_Datasets/Compare_Set_Easy_10imgs/imgs' #'D:\D_Desktop\Richie\Imgs' #'D:/Dropbox/LM2_Env/Image_Datasets/Acacia/Acacia_prickles_4-26-23_LANCZOS/images/short' #'D:\D_Desktop\Richie\Imgs' #'home/brlab/Dropbox/LM2_Env/Image_Datasets/Manuscript_Images' # 'D:\Dropbox\LM2_Env\Image_Datasets\SET_FieldPrism_Test\TESTING_OUTPUT\Images_Processed\REU_Field_QR-Code-Images\Cannon_Corrected\Images_Corrected' # 'F:\temp_3sppFamily' # 'D:/Dropbox/LM2_Env/Image_Datasets/GBIF_BroadSample_3SppPerFamily' # SET_Diospyros/images_short' # 'D:/Dropbox/LM2_Env/Image_Datasets/SET_Diospyros/images_short' #'D:\Dropbox\LM2_Env\Image_Datasets\GBIF_BroadSample_Herbarium' #'D:/Dropbox/LM2_Env/Image_Datasets/SET_Diospyros/images_short' # str | only for image_location:local | full path for directory containing images
image_location: 'local'
continue_run_from_partial_xlsx: 'D:\Dropbox\LM2_Env\VoucherVision_Datasets\POC_chatGPT__2022_09_07_thru12_S3_jacortez_AllAsia\2022_09_07_thru12_S3_jacortez_AllAsia\Transcription\transcribed.xlsx'
# continue_run_from_partial_xlsx: null
# Project Output Dir
dir_output: 'D:/Dropbox/LM2_Env/VoucherVision_Datasets/POC_chatGPT__2022_09_07_thru12_S3_jacortez_AllAsia' # 'D:/Dropbox/LM2_Env/Image_Datasets/TEST_LM2' # 'D:\D_Desktop\Richie\Richie_Out'
run_name: 'POC_chatGPT' #'images_short_TEST' #'images_short_landmark'
prefix_removal: 'MICH-V-'
suffix_removal: ''
catalog_numerical_only: True
# Embeddings and LLM
use_domain_knowledge: True
embeddings_database_name: 'EmbeddingsDB_all_asia_minimal_InRegion'
build_new_embeddings_database: False
path_to_domain_knowledge_xlsx: 'D:\Dropbox\LeafMachine2\leafmachine2\transcription\domain_knowledge/AllAsiaMinimalasof25May2023_2__InRegion.xlsx' #'D:/Dropbox/LeafMachine2/leafmachine2/transcription/domain_knowledge/AllAsiaMinimalasof25May2023_2__TRIMMEDtiny.xlsx'
batch_size: 500 #null # null = all
num_workers: 1 # int |DEFAULT| 4 # More is not always better. Most hardware loses performance after 4
modules:
specimen_crop: True
LLM_version: 'chatGPT' # from 'chatGPT' OR 'PaLM'
cropped_components:
# empty list for all, add to list to IGNORE, lowercase, comma seperated
# archival |FROM|
# ruler, barcode, colorcard, label, map, envelope, photo, attached_item, weights
# plant |FROM|
# leaf_whole, leaf_partial, leaflet, seed_fruit_one, seed_fruit_many, flower_one, flower_many, bud, specimen, roots, wood
do_save_cropped_annotations: True
save_cropped_annotations: ['label','barcode'] # 'save_all' to save all classes
save_per_image: False # creates a folder for each image, saves crops into class-names folders # TODO
save_per_annotation_class: True # saves crops into class-names folders
binarize_labels: False
binarize_labels_skeletonize: False
data:
save_json_rulers: False
save_json_measurements: False
save_individual_csv_files_rulers: False
save_individual_csv_files_measurements: False
include_darwin_core_data_from_combined_file: False
do_apply_conversion_factor: False ###########################
overlay:
save_overlay_to_pdf: True
save_overlay_to_jpgs: True
overlay_dpi: 300 # int |FROM| 100 to 300
overlay_background_color: 'black' # str |FROM| 'white' or 'black'
show_archival_detections: True
ignore_archival_detections_classes: []
show_plant_detections: True
ignore_plant_detections_classes: ['leaf_whole', 'specimen'] #['leaf_whole', 'leaf_partial', 'specimen']
show_segmentations: True
show_landmarks: True
ignore_landmark_classes: []
line_width_archival: 2 # int
line_width_plant: 6 # int
line_width_seg: 12 # int # thick = 12
line_width_efd: 6 # int # thick = 3
alpha_transparency_archival: 0.3 # float between 0 and 1
alpha_transparency_plant: 0
alpha_transparency_seg_whole_leaf: 0.4
alpha_transparency_seg_partial_leaf: 0.3
# Configure Archival Component Detector
archival_component_detector:
# ./leafmachine2/component_detector/runs/train/detector_type/detector_version/detector_iteration/weights/detector_weights
detector_type: 'Archival_Detector'
detector_version: 'PREP_final'
detector_iteration: 'PREP_final'
detector_weights: 'best.pt'
minimum_confidence_threshold: 0.5
do_save_prediction_overlay_images: True
ignore_objects_for_overlay: [] # list[str] # list of objects that can be excluded from the overlay # all = null