Spaces:
Running
Running
Major update. Support for 15 LLMs, World Flora Online taxonomy validation, geolocation, 2 OCR methods, significant UI changes, stability improvements, consistent JSON parsing
5e5ce28
''' | |
VoucherVision - based on LeafMachine2 Processes | |
''' | |
import os, inspect, sys, shutil | |
from time import perf_counter | |
currentdir = os.path.dirname(os.path.dirname(inspect.getfile(inspect.currentframe()))) | |
parentdir = os.path.dirname(currentdir) | |
sys.path.append(parentdir) | |
sys.path.append(currentdir) | |
from vouchervision.component_detector.component_detector import detect_plant_components, detect_archival_components | |
from vouchervision.general_utils import save_token_info_as_csv, print_main_start, check_for_subdirs_VV, load_config_file, load_config_file_testing, report_config, save_config_file, crop_detections_from_images_VV | |
from vouchervision.directory_structure_VV import Dir_Structure | |
from vouchervision.data_project import Project_Info | |
from vouchervision.LM2_logger import start_logging | |
from vouchervision.fetch_data import fetch_data | |
from vouchervision.utils_VoucherVision import VoucherVision, space_saver | |
from vouchervision.utils_hf import upload_to_drive | |
def voucher_vision(cfg_file_path, dir_home, path_custom_prompts, cfg_test, progress_report, json_report, path_api_cost=None, test_ind = None, is_hf = True, is_real_run=False): | |
t_overall = perf_counter() | |
# Load config file | |
report_config(dir_home, cfg_file_path, system='VoucherVision') | |
if cfg_test is None: | |
cfg = load_config_file(dir_home, cfg_file_path, system='VoucherVision') # For VoucherVision | |
else: | |
cfg = cfg_test | |
# Check to see if there are subdirs | |
# Yes --> use the names of the subsirs as run_name | |
run_name, dirs_list, has_subdirs = check_for_subdirs_VV(cfg) | |
print(f"run_name {run_name} dirs_list{dirs_list} has_subdirs{has_subdirs}") | |
# Dir structure | |
if is_real_run: | |
progress_report.update_overall(f"Creating Output Directory Structure") | |
print_main_start("Creating Directory Structure") | |
Dirs = Dir_Structure(cfg) | |
# logging.info("Hi") | |
logger = start_logging(Dirs, cfg) | |
# Check to see if required ML files are ready to use | |
if is_real_run: | |
progress_report.update_overall(f"Fetching LeafMachine2 Files") | |
ready_to_use = fetch_data(logger, dir_home, cfg_file_path) | |
assert ready_to_use, "Required ML files are not ready to use!\nThe download may have failed,\nor\nthe directory structure of LM2 has been altered" | |
# Wrangle images and preprocess | |
print_main_start("Gathering Images and Image Metadata") | |
Project = Project_Info(cfg, logger, dir_home, Dirs) # Where file names are modified | |
# Save config file | |
save_config_file(cfg, logger, Dirs) | |
# Detect Archival Components | |
print_main_start("Locating Archival Components") | |
Project = detect_archival_components(cfg, logger, dir_home, Project, Dirs, is_real_run, progress_report) | |
# Save cropped detections | |
crop_detections_from_images_VV(cfg, logger, dir_home, Project, Dirs) | |
# Process labels | |
Voucher_Vision = VoucherVision(cfg, logger, dir_home, path_custom_prompts, Project, Dirs, is_hf) | |
n_images = len(Voucher_Vision.img_paths) | |
last_JSON_response, final_WFO_record, final_GEO_record, total_tokens_in, total_tokens_out = Voucher_Vision.process_specimen_batch(progress_report, json_report, is_real_run) | |
total_cost = save_token_info_as_csv(Dirs, cfg['leafmachine']['LLM_version'], path_api_cost, total_tokens_in, total_tokens_out, n_images, dir_home, logger) | |
t_overall_s = perf_counter() | |
logger.name = 'Run Complete! :)' | |
logger.info(f"[Total elapsed time] {round((t_overall_s - t_overall)/60)} minutes") | |
space_saver(cfg, Dirs, logger) | |
if is_real_run: | |
progress_report.update_overall(f"Run Complete!") | |
Voucher_Vision.close_logger_handlers() | |
zip_filepath = None | |
# Create Higging Face zip file | |
dir_to_zip = os.path.join(Dirs.dir_home, Dirs.run_name) | |
zip_filename = Dirs.run_name | |
# Creating a zip file | |
zip_filepath = make_zipfile(dir_to_zip, zip_filename) ####################################################################################################### TODO Make this configurable | |
if is_hf: | |
upload_to_drive(zip_filepath, zip_filename, is_hf, cfg_private=Voucher_Vision.cfg_private, do_upload=True) ###################################### TODO Make this configurable | |
else: | |
upload_to_drive(zip_filepath, zip_filename, is_hf, cfg_private=Voucher_Vision.cfg_private, do_upload=False) ##################################### TODO Make this configurable | |
return last_JSON_response, final_WFO_record, final_GEO_record, total_cost, Voucher_Vision.n_failed_OCR, Voucher_Vision.n_failed_LLM_calls, zip_filepath | |
def make_zipfile(base_dir, output_filename): | |
# Determine the directory where the zip file should be saved | |
# Construct the full path for the zip file | |
full_output_path = os.path.join(base_dir, output_filename) | |
# Create the zip archive | |
shutil.make_archive(full_output_path, 'zip', base_dir) | |
# Return the full path of the created zip file | |
return os.path.join(base_dir, output_filename + '.zip') | |
def voucher_vision_OCR_test(cfg_file_path, dir_home, cfg_test, path_to_crop): | |
# get_n_overall = progress_report.get_n_overall() | |
# progress_report.update_overall(f"Working on {test_ind+1} of {get_n_overall}") | |
# Load config file | |
report_config(dir_home, cfg_file_path, system='VoucherVision') | |
if cfg_test is None: | |
cfg = load_config_file(dir_home, cfg_file_path, system='VoucherVision') # For VoucherVision | |
else: | |
cfg = cfg_test | |
# user_cfg = load_config_file(dir_home, cfg_file_path) | |
# cfg = Config(user_cfg) | |
# Check to see if there are subdirs | |
# Yes --> use the names of the subsirs as run_name | |
run_name, dirs_list, has_subdirs = check_for_subdirs_VV(cfg) | |
print(f"run_name {run_name} dirs_list{dirs_list} has_subdirs{has_subdirs}") | |
# for dir_ind, dir_in in enumerate(dirs_list): | |
# if has_subdirs: | |
# cfg['leafmachine']['project']['dir_images_local'] = dir_in | |
# cfg['leafmachine']['project']['run_name'] = run_name[dir_ind] | |
# Dir structure | |
print_main_start("Creating Directory Structure") | |
Dirs = Dir_Structure(cfg) | |
# logging.info("Hi") | |
logger = start_logging(Dirs, cfg) | |
# Check to see if required ML files are ready to use | |
ready_to_use = fetch_data(logger, dir_home, cfg_file_path) | |
assert ready_to_use, "Required ML files are not ready to use!\nThe download may have failed,\nor\nthe directory structure of LM2 has been altered" | |
# Wrangle images and preprocess | |
print_main_start("Gathering Images and Image Metadata") | |
Project = Project_Info(cfg, logger, dir_home, Dirs) # Where file names are modified | |
# Save config file | |
save_config_file(cfg, logger, Dirs) | |
# Detect Archival Components | |
print_main_start("Locating Archival Components") | |
Project = detect_archival_components(cfg, logger, dir_home, Project, Dirs) | |
# Save cropped detections | |
crop_detections_from_images_VV(cfg, logger, dir_home, Project, Dirs) | |
# Process labels | |
Voucher_Vision = VoucherVision(cfg, logger, dir_home, None, Project, Dirs) | |
last_JSON_response = Voucher_Vision.process_specimen_batch_OCR_test(path_to_crop) | |
if __name__ == '__main__': | |
is_test = False | |
# Set LeafMachine2 dir | |
dir_home = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) | |
if is_test: | |
cfg_file_path = os.path.join(dir_home, 'demo','demo.yaml') #'D:\Dropbox\LeafMachine2\LeafMachine2.yaml' | |
# cfg_file_path = 'test_installation' | |
cfg_testing = load_config_file_testing(dir_home, cfg_file_path) | |
cfg_testing['leafmachine']['project']['dir_images_local'] = os.path.join(dir_home, cfg_testing['leafmachine']['project']['dir_images_local'][0], cfg_testing['leafmachine']['project']['dir_images_local'][1]) | |
cfg_testing['leafmachine']['project']['dir_output'] = os.path.join(dir_home, cfg_testing['leafmachine']['project']['dir_output'][0], cfg_testing['leafmachine']['project']['dir_output'][1]) | |
last_JSON_response = voucher_vision(cfg_file_path, dir_home, cfg_testing, None) | |
else: | |
cfg_file_path = None | |
cfg_testing = None | |
last_JSON_response = voucher_vision(cfg_file_path, dir_home, cfg_testing, None) |