phyloforfun commited on
Commit
dbaeac5
1 Parent(s): 67f7ed6

Major update. Support for 15 LLMs, World Flora Online taxonomy validation, geolocation, 2 OCR methods, significant UI changes, stability improvements, consistent JSON parsing

Browse files
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import streamlit as st
2
- import yaml, os, json, random, time, re, torch, random, warnings, shutil, sys
3
  import seaborn as sns
4
  import plotly.graph_objs as go
5
  from PIL import Image
@@ -14,7 +14,7 @@ from vouchervision.vouchervision_main import voucher_vision
14
  from vouchervision.general_utils import test_GPU, get_cfg_from_full_path, summarize_expense_report, validate_dir
15
  from vouchervision.model_maps import ModelMaps
16
  from vouchervision.API_validation import APIvalidation
17
- from vouchervision.utils_hf import setup_streamlit_config, save_uploaded_file, save_uploaded_local
18
  from vouchervision.data_project import convert_pdf_to_jpg
19
  from vouchervision.utils_LLM import check_system_gpus
20
 
@@ -42,7 +42,7 @@ if 'config' not in st.session_state:
42
  st.session_state.config, st.session_state.dir_home = build_VV_config(loaded_cfg=None)
43
  setup_streamlit_config(st.session_state.dir_home)
44
 
45
- # st.session_state['is_hf'] = True
46
 
47
  ########################################################################################################
48
  ### Global constants ####
@@ -273,7 +273,7 @@ def content_input_images(col_left, col_right):
273
  if st.session_state.is_hf:
274
  if uploaded_files:
275
  # Clear input image gallery and input list
276
- clear_image_gallery()
277
 
278
  for uploaded_file in uploaded_files:
279
  # Determine the file type
@@ -336,6 +336,45 @@ def content_input_images(col_left, col_right):
336
  pass
337
  # elif st.session_state['input_list_small'] and (st.session_state['dir_images_local_TEMP'] != st.session_state.config['leafmachine']['project']['dir_images_local']):
338
  elif (st.session_state['dir_images_local_TEMP'] != st.session_state.config['leafmachine']['project']['dir_images_local']):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
339
  dir_images_local = st.session_state.config['leafmachine']['project']['dir_images_local']
340
  count_n_imgs = list_jpg_files(dir_images_local)
341
  st.session_state['processing_add_on'] = count_n_imgs
@@ -412,6 +451,10 @@ def delete_directory(dir_path):
412
 
413
 
414
  def clear_image_gallery():
 
 
 
 
415
  delete_directory(st.session_state['dir_uploaded_images'])
416
  delete_directory(st.session_state['dir_uploaded_images_small'])
417
  validate_dir(st.session_state['dir_uploaded_images'])
@@ -423,7 +466,7 @@ def use_test_image():
423
  st.session_state.config['leafmachine']['project']['dir_images_local'] = os.path.join(st.session_state.dir_home,'demo','demo_images')
424
  n_images = len([f for f in os.listdir(st.session_state.config['leafmachine']['project']['dir_images_local']) if os.path.isfile(os.path.join(st.session_state.config['leafmachine']['project']['dir_images_local'], f))])
425
  st.session_state['processing_add_on'] = n_images
426
- clear_image_gallery()
427
  st.session_state['uploader_idk'] += 1
428
  for file in os.listdir(st.session_state.config['leafmachine']['project']['dir_images_local']):
429
  file_path = save_uploaded_file(os.path.join(st.session_state.dir_home,'demo','demo_images'), file)
 
1
  import streamlit as st
2
+ import yaml, os, json, random, time, re, torch, random, warnings, shutil, sys, glob
3
  import seaborn as sns
4
  import plotly.graph_objs as go
5
  from PIL import Image
 
14
  from vouchervision.general_utils import test_GPU, get_cfg_from_full_path, summarize_expense_report, validate_dir
15
  from vouchervision.model_maps import ModelMaps
16
  from vouchervision.API_validation import APIvalidation
17
+ from vouchervision.utils_hf import setup_streamlit_config, save_uploaded_file, save_uploaded_local, save_uploaded_file_local
18
  from vouchervision.data_project import convert_pdf_to_jpg
19
  from vouchervision.utils_LLM import check_system_gpus
20
 
 
42
  st.session_state.config, st.session_state.dir_home = build_VV_config(loaded_cfg=None)
43
  setup_streamlit_config(st.session_state.dir_home)
44
 
45
+ st.session_state['is_hf'] = True
46
 
47
  ########################################################################################################
48
  ### Global constants ####
 
273
  if st.session_state.is_hf:
274
  if uploaded_files:
275
  # Clear input image gallery and input list
276
+ clear_image_uploads()
277
 
278
  for uploaded_file in uploaded_files:
279
  # Determine the file type
 
336
  pass
337
  # elif st.session_state['input_list_small'] and (st.session_state['dir_images_local_TEMP'] != st.session_state.config['leafmachine']['project']['dir_images_local']):
338
  elif (st.session_state['dir_images_local_TEMP'] != st.session_state.config['leafmachine']['project']['dir_images_local']):
339
+ has_pdf = False
340
+ clear_image_uploads()
341
+
342
+ for input_file in os.listdir(st.session_state.config['leafmachine']['project']['dir_images_local']):
343
+ if input_file.split('.')[1].lower() in ['jpg','jpeg']:
344
+ pass
345
+ elif input_file.split('.')[1].lower() in ['pdf',]:
346
+ has_pdf = True
347
+ # Handle PDF files
348
+ file_path = save_uploaded_file_local(st.session_state.config['leafmachine']['project']['dir_images_local'], st.session_state['dir_uploaded_images'], input_file)
349
+ # Convert each page of the PDF to an image
350
+ n_pages = convert_pdf_to_jpg(file_path, st.session_state['dir_uploaded_images'], dpi=200)#st.session_state.config['leafmachine']['project']['dir_images_local'])
351
+
352
+
353
+ # pdf_files_pattern = os.path.join(st.session_state['dir_uploaded_images'], '*.pdf')
354
+ # for pdf_file in glob.glob(pdf_files_pattern):
355
+ # os.remove(pdf_file)
356
+
357
+ # # Update the input list for each page image
358
+ # converted_files = os.listdir(st.session_state['dir_uploaded_images'])
359
+ # for file_name in converted_files:
360
+ # if file_name.lower().endswith('.jpg'):
361
+ # jpg_file_path = os.path.join(st.session_state['dir_uploaded_images'], file_name)
362
+ # st.session_state['input_list'].append(jpg_file_path)
363
+
364
+ # # Optionally, create a thumbnail for the gallery
365
+ # img = Image.open(jpg_file_path)
366
+ # img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
367
+ # file_path_small = save_uploaded_file_local(st.session_state['dir_uploaded_images'], st.session_state['dir_uploaded_images_small'], file_name, img)
368
+ # st.session_state['input_list_small'].append(file_path_small)
369
+
370
+ # st.session_state.config['leafmachine']['project']['dir_images_local'] = st.session_state['dir_uploaded_images']
371
+
372
+ else:
373
+ pass
374
+ # st.warning("Inputs must be '.PDF' or '.jpg' or '.jpeg'")
375
+ if has_pdf:
376
+ st.session_state.config['leafmachine']['project']['dir_images_local'] = st.session_state['dir_uploaded_images']
377
+
378
  dir_images_local = st.session_state.config['leafmachine']['project']['dir_images_local']
379
  count_n_imgs = list_jpg_files(dir_images_local)
380
  st.session_state['processing_add_on'] = count_n_imgs
 
451
 
452
 
453
  def clear_image_gallery():
454
+ delete_directory(st.session_state['dir_uploaded_images_small'])
455
+ validate_dir(st.session_state['dir_uploaded_images_small'])
456
+
457
+ def clear_image_uploads():
458
  delete_directory(st.session_state['dir_uploaded_images'])
459
  delete_directory(st.session_state['dir_uploaded_images_small'])
460
  validate_dir(st.session_state['dir_uploaded_images'])
 
466
  st.session_state.config['leafmachine']['project']['dir_images_local'] = os.path.join(st.session_state.dir_home,'demo','demo_images')
467
  n_images = len([f for f in os.listdir(st.session_state.config['leafmachine']['project']['dir_images_local']) if os.path.isfile(os.path.join(st.session_state.config['leafmachine']['project']['dir_images_local'], f))])
468
  st.session_state['processing_add_on'] = n_images
469
+ clear_image_uploads()
470
  st.session_state['uploader_idk'] += 1
471
  for file in os.listdir(st.session_state.config['leafmachine']['project']['dir_images_local']):
472
  file_path = save_uploaded_file(os.path.join(st.session_state.dir_home,'demo','demo_images'), file)
vouchervision/OCR_google_cloud_vision.py CHANGED
@@ -144,6 +144,9 @@ class OCREngine:
144
 
145
  def init_gemini_vision(self):
146
  pass
 
 
 
147
 
148
 
149
  def detect_text_craft(self):
 
144
 
145
  def init_gemini_vision(self):
146
  pass
147
+
148
+ def init_gpt4_vision(self):
149
+ pass
150
 
151
 
152
  def detect_text_craft(self):
vouchervision/utils_hf.py CHANGED
@@ -6,7 +6,7 @@ import base64
6
  from PIL import Image
7
  from PIL import Image
8
  from io import BytesIO
9
- from shutil import copyfileobj
10
 
11
  # from vouchervision.general_utils import get_cfg_from_full_path
12
 
@@ -37,6 +37,37 @@ def setup_streamlit_config(dir_home):
37
  f.write(config_content.strip())
38
 
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  def save_uploaded_file(directory, img_file, image=None):
42
  if not os.path.exists(directory):
 
6
  from PIL import Image
7
  from PIL import Image
8
  from io import BytesIO
9
+ from shutil import copyfileobj, copyfile
10
 
11
  # from vouchervision.general_utils import get_cfg_from_full_path
12
 
 
37
  f.write(config_content.strip())
38
 
39
 
40
+ def save_uploaded_file_local(directory_in, directory_out, img_file_name, image=None):
41
+ if not os.path.exists(directory_out):
42
+ os.makedirs(directory_out)
43
+
44
+ # Assuming img_file_name includes the extension
45
+ img_file_base, img_file_ext = os.path.splitext(img_file_name)
46
+
47
+ full_path_out = os.path.join(directory_out, img_file_name)
48
+ full_path_in = os.path.join(directory_in, img_file_name)
49
+
50
+ # Check if the file extension is .pdf (or add other conditions for different file types)
51
+ if img_file_ext.lower() == '.pdf':
52
+ # Copy the file from the input directory to the output directory
53
+ copyfile(full_path_in, full_path_out)
54
+ return full_path_out
55
+ else:
56
+ if image is None:
57
+ try:
58
+ with Image.open(full_path_in) as image:
59
+ image.save(full_path_out, "JPEG")
60
+ # Return the full path of the saved image
61
+ return full_path_out
62
+ except:
63
+ pass
64
+ else:
65
+ try:
66
+ image.save(full_path_out, "JPEG")
67
+ return full_path_out
68
+ except:
69
+ pass
70
+
71
 
72
  def save_uploaded_file(directory, img_file, image=None):
73
  if not os.path.exists(directory):