Spaces:
Running
Running
phyloforfun
commited on
Commit
β’
b55e03e
1
Parent(s):
d48e79a
Major update. Support for 15 LLMs, World Flora Online taxonomy validation, geolocation, 2 OCR methods, significant UI changes, stability improvements, consistent JSON parsing
Browse files- app.py +117 -65
- pages/faqs.py +9 -6
- pages/prompt_builder.py +8 -5
- pages/report_bugs.py +8 -3
app.py
CHANGED
@@ -18,7 +18,8 @@ from vouchervision.utils_hf import setup_streamlit_config, save_uploaded_file, s
|
|
18 |
from vouchervision.data_project import convert_pdf_to_jpg
|
19 |
from vouchervision.utils_LLM import check_system_gpus
|
20 |
|
21 |
-
|
|
|
22 |
#################################################################################################################################################
|
23 |
# Initializations ###############################################################################################################################
|
24 |
#################################################################################################################################################
|
@@ -27,12 +28,13 @@ st.set_page_config(layout="wide", page_icon='img/icon.ico', page_title='VoucherV
|
|
27 |
# Parse the 'is_hf' argument and set it in session state
|
28 |
if 'is_hf' not in st.session_state:
|
29 |
is_hf_os = os.getenv('IS_HF', '').lower() # Get the environment variable and convert to lowercase for uniformity
|
|
|
30 |
if is_hf_os in ['1', 'true']: # Check against string representations of truthy values
|
31 |
st.session_state['is_hf'] = True
|
32 |
else:
|
33 |
st.session_state['is_hf'] = False
|
34 |
|
35 |
-
print(f"is_hf {st.session_state['is_hf']}")
|
36 |
|
37 |
|
38 |
# Default YAML file path
|
@@ -223,6 +225,28 @@ if 'dir_uploaded_images_small' not in st.session_state:
|
|
223 |
########################################################################################################
|
224 |
### CONTENT [] ####
|
225 |
########################################################################################################
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
226 |
def content_input_images(col_left, col_right):
|
227 |
st.write('---')
|
228 |
# col1, col2 = st.columns([2,8])
|
@@ -259,17 +283,8 @@ def content_input_images(col_left, col_right):
|
|
259 |
n_pages = convert_pdf_to_jpg(file_path, st.session_state['dir_uploaded_images'], dpi=st.session_state.config['leafmachine']['project']['dir_images_local'])
|
260 |
# Update the input list for each page image
|
261 |
converted_files = os.listdir(st.session_state['dir_uploaded_images'])
|
262 |
-
|
263 |
-
|
264 |
-
if file_name.lower().endswith('.jpg'):
|
265 |
-
jpg_file_path = os.path.join(st.session_state['dir_uploaded_images'], file_name)
|
266 |
-
st.session_state['input_list'].append(jpg_file_path)
|
267 |
-
|
268 |
-
# Optionally, create a thumbnail for the gallery
|
269 |
-
img = Image.open(jpg_file_path)
|
270 |
-
img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
|
271 |
-
file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], uploaded_file, img)
|
272 |
-
st.session_state['input_list_small'].append(file_path_small)
|
273 |
else:
|
274 |
# Handle JPG/JPEG files (existing process)
|
275 |
file_path = save_uploaded_file(st.session_state['dir_uploaded_images'], uploaded_file)
|
@@ -289,7 +304,7 @@ def content_input_images(col_left, col_right):
|
|
289 |
else:
|
290 |
# If there are less than 100 images, take them all
|
291 |
images_to_display = st.session_state['input_list_small']
|
292 |
-
|
293 |
|
294 |
else:
|
295 |
st.session_state['view_local_gallery'] = st.toggle("View Image Gallery",)
|
@@ -301,7 +316,7 @@ def content_input_images(col_left, col_right):
|
|
301 |
info_txt = f"Showing {st.session_state['processing_add_on']} out of {st.session_state['processing_add_on']} images"
|
302 |
st.info(info_txt)
|
303 |
try:
|
304 |
-
|
305 |
except:
|
306 |
pass
|
307 |
|
@@ -1117,7 +1132,7 @@ def save_changes_to_API_keys(cfg_private,openai_api_key,azure_openai_api_version
|
|
1117 |
|
1118 |
|
1119 |
|
1120 |
-
|
1121 |
def show_header_welcome():
|
1122 |
st.session_state.logo_path = os.path.join(st.session_state.dir_home, 'img','logo.png')
|
1123 |
st.session_state.logo = Image.open(st.session_state.logo_path)
|
@@ -1400,14 +1415,13 @@ def content_header():
|
|
1400 |
ct_left, ct_right = st.columns([1,1])
|
1401 |
with ct_left:
|
1402 |
st.button("Refresh", on_click=refresh, use_container_width=True)
|
1403 |
-
with ct_right:
|
1404 |
-
|
1405 |
-
|
|
|
|
|
1406 |
|
1407 |
-
|
1408 |
-
# st.session_state.proceed_to_faqs = True
|
1409 |
-
# st.session_state.proceed_to_main = False
|
1410 |
-
# st.rerun()
|
1411 |
|
1412 |
# with col_run_2:
|
1413 |
# if st.button("Test GPT"):
|
@@ -1495,7 +1509,7 @@ def content_project_settings(col):
|
|
1495 |
st.session_state.config['leafmachine']['project']['dir_output'] = st.text_input("Output directory", st.session_state.config['leafmachine']['project'].get('dir_output', ''))
|
1496 |
|
1497 |
|
1498 |
-
|
1499 |
def content_llm_cost():
|
1500 |
st.write("---")
|
1501 |
st.header('LLM Cost Calculator')
|
@@ -1530,13 +1544,29 @@ def content_llm_cost():
|
|
1530 |
n_img = st.number_input("Number of Images", min_value=0, value=1000, step=100)
|
1531 |
|
1532 |
# Function to find the model's Input and Output values
|
|
|
1533 |
def find_model_values(model, all_dfs):
|
1534 |
for df in all_dfs:
|
1535 |
if model in df.keys():
|
1536 |
return df[model]['in'], df[model]['out']
|
1537 |
return None, None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1538 |
|
1539 |
-
# Calculate and display cost when button is pressed
|
1540 |
input_value, output_value = find_model_values(selected_model,
|
1541 |
[st.session_state['cost_openai'], st.session_state['cost_azure'], st.session_state['cost_google'], st.session_state['cost_mistral'], st.session_state['cost_local']])
|
1542 |
if input_value is not None and output_value is not None:
|
@@ -1544,17 +1574,18 @@ def content_llm_cost():
|
|
1544 |
with calculator_5:
|
1545 |
st.text_input("Total Cost", f"${round(cost,2)}") # selected_model
|
1546 |
|
|
|
1547 |
with col_cost_1:
|
1548 |
-
rounding
|
1549 |
-
st.dataframe(st.session_state.styled_cost_openai.format(precision=rounding), hide_index=True,)
|
1550 |
with col_cost_2:
|
1551 |
-
|
1552 |
with col_cost_3:
|
1553 |
-
|
1554 |
with col_cost_4:
|
1555 |
-
|
1556 |
with col_cost_5:
|
1557 |
-
|
|
|
1558 |
|
1559 |
|
1560 |
|
@@ -1572,10 +1603,12 @@ def content_prompt_and_llm_version():
|
|
1572 |
selected_version = default_version
|
1573 |
st.session_state.config['leafmachine']['project']['prompt_version'] = st.selectbox("Prompt Version", available_prompts, index=available_prompts.index(selected_version),label_visibility='collapsed')
|
1574 |
|
1575 |
-
with col_prompt_2:
|
1576 |
-
|
1577 |
-
|
1578 |
-
|
|
|
|
|
1579 |
|
1580 |
|
1581 |
st.header('LLM Version')
|
@@ -1785,10 +1818,29 @@ def content_ocr_method():
|
|
1785 |
# elif (OCR_option == 'hand') and do_use_trOCR:
|
1786 |
# st.text_area(label='Handwritten/Printed + trOCR',placeholder=demo_text_trh,disabled=True, label_visibility='visible', height=150)
|
1787 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1788 |
|
|
|
|
|
|
|
1789 |
|
1790 |
def content_collage_overlay():
|
1791 |
-
st.
|
1792 |
col_collage, col_overlay = st.columns([4,4])
|
1793 |
|
1794 |
|
@@ -1797,7 +1849,7 @@ def content_collage_overlay():
|
|
1797 |
st.header('LeafMachine2 Label Collage')
|
1798 |
st.info("NOTE: We strongly recommend enabling LeafMachine2 cropping if your images are full sized herbarium sheet. Often, the OCR algorithm struggles with full sheets, but works well with the collage images. We have disabled the collage by default for this Hugging Face Space because the Space lacks a GPU and the collage creation takes a bit longer.")
|
1799 |
default_crops = st.session_state.config['leafmachine']['cropped_components']['save_cropped_annotations']
|
1800 |
-
st.
|
1801 |
st.session_state.config['leafmachine']['use_RGB_label_images'] = st.checkbox(":rainbow[Use LeafMachine2 label collage for transcriptions]", st.session_state.config['leafmachine'].get('use_RGB_label_images', False))
|
1802 |
|
1803 |
|
@@ -1805,33 +1857,17 @@ def content_collage_overlay():
|
|
1805 |
options=['ruler', 'barcode','label', 'colorcard','map','envelope','photo','attached_item','weights',
|
1806 |
'leaf_whole', 'leaf_partial', 'leaflet', 'seed_fruit_one', 'seed_fruit_many', 'flower_one', 'flower_many', 'bud','specimen','roots','wood'],default=default_crops)
|
1807 |
st.session_state.config['leafmachine']['cropped_components']['save_cropped_annotations'] = option_selected_crops
|
1808 |
-
|
1809 |
-
if "demo_collage" not in st.session_state:
|
1810 |
-
# ba = os.path.join(st.session_state.dir_home, 'demo', 'ba', 'ba2.png')
|
1811 |
-
ba = os.path.join(st.session_state.dir_home, 'demo', 'ba', 'ba2.png')
|
1812 |
-
st.session_state["demo_collage"] = Image.open(ba)
|
1813 |
-
|
1814 |
-
# Display the image
|
1815 |
-
with st.expander(":frame_with_picture: View an example of the LeafMachine2 collage image"):
|
1816 |
-
st.image(st.session_state["demo_collage"], caption='LeafMachine2 Collage', output_format="PNG")
|
1817 |
-
# st.image(st.session_state["demo_collage"], caption='LeafMachine2 Collage', output_format="JPEG")
|
1818 |
|
1819 |
with col_overlay:
|
1820 |
st.header('OCR Overlay Image')
|
1821 |
|
1822 |
-
st.
|
1823 |
|
1824 |
do_create_OCR_helper_image = st.checkbox("Create image showing an overlay of the OCR detections",value=st.session_state.config['leafmachine']['do_create_OCR_helper_image'],disabled=True)
|
1825 |
st.session_state.config['leafmachine']['do_create_OCR_helper_image'] = do_create_OCR_helper_image
|
1826 |
-
|
1827 |
-
if "demo_overlay" not in st.session_state:
|
1828 |
-
# ocr = os.path.join(st.session_state.dir_home,'demo', 'ba','ocr.png')
|
1829 |
-
ocr = os.path.join(st.session_state.dir_home,'demo', 'ba','ocr2.png')
|
1830 |
-
st.session_state["demo_overlay"] = Image.open(ocr)
|
1831 |
|
1832 |
-
with st.expander(":frame_with_picture: View an example of the OCR overlay image"):
|
1833 |
-
st.image(st.session_state["demo_overlay"], caption='OCR Overlay Images', output_format = "PNG")
|
1834 |
-
# st.image(st.session_state["demo_overlay"], caption='OCR Overlay Images', output_format = "JPEG")
|
1835 |
|
1836 |
|
1837 |
|
@@ -1909,10 +1945,10 @@ def content_processing_options():
|
|
1909 |
with col_v2:
|
1910 |
|
1911 |
|
1912 |
-
print(f"Number of GPUs: {st.session_state.num_gpus}")
|
1913 |
-
print(f"GPU Details: {st.session_state.gpu_dict}")
|
1914 |
-
print(f"Total VRAM: {st.session_state.total_vram_gb} GB")
|
1915 |
-
print(f"Capability Score: {st.session_state.capability_score}")
|
1916 |
|
1917 |
st.header('System GPU Information')
|
1918 |
st.markdown(f"**Torch CUDA:** {torch.cuda.is_available()}")
|
@@ -1989,6 +2025,7 @@ def content_space_saver():
|
|
1989 |
#################################################################################################################################################
|
1990 |
# render_expense_report_summary #################################################################################################################
|
1991 |
#################################################################################################################################################
|
|
|
1992 |
def render_expense_report_summary():
|
1993 |
expense_summary = st.session_state.expense_summary
|
1994 |
expense_report = st.session_state.expense_report
|
@@ -2099,12 +2136,8 @@ def content_less_used():
|
|
2099 |
#################################################################################################################################################
|
2100 |
# Sidebar #######################################################################################################################################
|
2101 |
#################################################################################################################################################
|
|
|
2102 |
def sidebar_content():
|
2103 |
-
# st.page_link(os.path.join(os.path.dirname(__file__),'app.py'), label="Home", icon="π ")
|
2104 |
-
# st.page_link(os.path.join(os.path.dirname(__file__),"pages","prompt_builder.py"), label="Prompt Builder", icon="π§")
|
2105 |
-
# st.page_link("pages/page_2.py", label="Page 2", icon="2οΈβ£", disabled=True)
|
2106 |
-
# st.page_link("http://www.google.com", label="Google", icon="π")
|
2107 |
-
|
2108 |
if not os.path.exists(os.path.join(st.session_state.dir_home,'expense_report')):
|
2109 |
validate_dir(os.path.join(st.session_state.dir_home,'expense_report'))
|
2110 |
expense_report_path = os.path.join(st.session_state.dir_home, 'expense_report', 'expense_report.csv')
|
@@ -2156,11 +2189,21 @@ def main():
|
|
2156 |
#################################################################################################################################################
|
2157 |
# Main ##########################################################################################################################################
|
2158 |
#################################################################################################################################################
|
|
|
2159 |
if st.session_state['is_hf']:
|
2160 |
# if st.session_state.proceed_to_build_llm_prompt:
|
2161 |
# build_LLM_prompt_config()
|
2162 |
if st.session_state.proceed_to_main:
|
|
|
|
|
|
|
|
|
2163 |
main()
|
|
|
|
|
|
|
|
|
|
|
2164 |
|
2165 |
else:
|
2166 |
if not st.session_state.private_file:
|
@@ -2170,8 +2213,17 @@ else:
|
|
2170 |
elif st.session_state.proceed_to_private and not st.session_state['is_hf']:
|
2171 |
create_private_file()
|
2172 |
elif st.session_state.proceed_to_main:
|
|
|
|
|
|
|
|
|
2173 |
main()
|
2174 |
|
|
|
|
|
|
|
|
|
|
|
2175 |
|
2176 |
|
2177 |
|
|
|
18 |
from vouchervision.data_project import convert_pdf_to_jpg
|
19 |
from vouchervision.utils_LLM import check_system_gpus
|
20 |
|
21 |
+
import cProfile
|
22 |
+
import pstats
|
23 |
#################################################################################################################################################
|
24 |
# Initializations ###############################################################################################################################
|
25 |
#################################################################################################################################################
|
|
|
28 |
# Parse the 'is_hf' argument and set it in session state
|
29 |
if 'is_hf' not in st.session_state:
|
30 |
is_hf_os = os.getenv('IS_HF', '').lower() # Get the environment variable and convert to lowercase for uniformity
|
31 |
+
print(f"=== os.getenv('IS_HF', '').lower() ===> {is_hf_os} ===")
|
32 |
if is_hf_os in ['1', 'true']: # Check against string representations of truthy values
|
33 |
st.session_state['is_hf'] = True
|
34 |
else:
|
35 |
st.session_state['is_hf'] = False
|
36 |
|
37 |
+
print(f"=== is_hf {st.session_state['is_hf']} ===")
|
38 |
|
39 |
|
40 |
# Default YAML file path
|
|
|
225 |
########################################################################################################
|
226 |
### CONTENT [] ####
|
227 |
########################################################################################################
|
228 |
+
@st.cache_data
|
229 |
+
def show_gallery_small():
|
230 |
+
st.image(st.session_state['input_list_small'], width=GALLERY_IMAGE_SIZE)
|
231 |
+
|
232 |
+
@st.cache_data
|
233 |
+
def show_gallery_small_hf(images_to_display):
|
234 |
+
st.image(images_to_display)
|
235 |
+
|
236 |
+
|
237 |
+
@st.cache_data
|
238 |
+
def load_gallery(converted_files, uploaded_file):
|
239 |
+
for file_name in converted_files:
|
240 |
+
if file_name.lower().endswith('.jpg'):
|
241 |
+
jpg_file_path = os.path.join(st.session_state['dir_uploaded_images'], file_name)
|
242 |
+
st.session_state['input_list'].append(jpg_file_path)
|
243 |
+
|
244 |
+
# Optionally, create a thumbnail for the gallery
|
245 |
+
img = Image.open(jpg_file_path)
|
246 |
+
img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
|
247 |
+
file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], uploaded_file, img)
|
248 |
+
st.session_state['input_list_small'].append(file_path_small)
|
249 |
+
|
250 |
def content_input_images(col_left, col_right):
|
251 |
st.write('---')
|
252 |
# col1, col2 = st.columns([2,8])
|
|
|
283 |
n_pages = convert_pdf_to_jpg(file_path, st.session_state['dir_uploaded_images'], dpi=st.session_state.config['leafmachine']['project']['dir_images_local'])
|
284 |
# Update the input list for each page image
|
285 |
converted_files = os.listdir(st.session_state['dir_uploaded_images'])
|
286 |
+
load_gallery(converted_files, uploaded_file)
|
287 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
288 |
else:
|
289 |
# Handle JPG/JPEG files (existing process)
|
290 |
file_path = save_uploaded_file(st.session_state['dir_uploaded_images'], uploaded_file)
|
|
|
304 |
else:
|
305 |
# If there are less than 100 images, take them all
|
306 |
images_to_display = st.session_state['input_list_small']
|
307 |
+
show_gallery_small_hf(images_to_display)
|
308 |
|
309 |
else:
|
310 |
st.session_state['view_local_gallery'] = st.toggle("View Image Gallery",)
|
|
|
316 |
info_txt = f"Showing {st.session_state['processing_add_on']} out of {st.session_state['processing_add_on']} images"
|
317 |
st.info(info_txt)
|
318 |
try:
|
319 |
+
show_gallery_small()
|
320 |
except:
|
321 |
pass
|
322 |
|
|
|
1132 |
|
1133 |
|
1134 |
|
1135 |
+
@st.cache_data
|
1136 |
def show_header_welcome():
|
1137 |
st.session_state.logo_path = os.path.join(st.session_state.dir_home, 'img','logo.png')
|
1138 |
st.session_state.logo = Image.open(st.session_state.logo_path)
|
|
|
1415 |
ct_left, ct_right = st.columns([1,1])
|
1416 |
with ct_left:
|
1417 |
st.button("Refresh", on_click=refresh, use_container_width=True)
|
1418 |
+
# with ct_right:
|
1419 |
+
# try:
|
1420 |
+
# st.page_link(os.path.join("pages","faqs.py"), label="FAQs", icon="β")
|
1421 |
+
# except:
|
1422 |
+
# st.page_link(os.path.join(os.path.dirname(__file__),"pages","faqs.py"), label="FAQs", icon="β")
|
1423 |
|
1424 |
+
|
|
|
|
|
|
|
1425 |
|
1426 |
# with col_run_2:
|
1427 |
# if st.button("Test GPT"):
|
|
|
1509 |
st.session_state.config['leafmachine']['project']['dir_output'] = st.text_input("Output directory", st.session_state.config['leafmachine']['project'].get('dir_output', ''))
|
1510 |
|
1511 |
|
1512 |
+
# @st.cache_data
|
1513 |
def content_llm_cost():
|
1514 |
st.write("---")
|
1515 |
st.header('LLM Cost Calculator')
|
|
|
1544 |
n_img = st.number_input("Number of Images", min_value=0, value=1000, step=100)
|
1545 |
|
1546 |
# Function to find the model's Input and Output values
|
1547 |
+
@st.cache_data
|
1548 |
def find_model_values(model, all_dfs):
|
1549 |
for df in all_dfs:
|
1550 |
if model in df.keys():
|
1551 |
return df[model]['in'], df[model]['out']
|
1552 |
return None, None
|
1553 |
+
|
1554 |
+
@st.cache_data
|
1555 |
+
def show_cost_matrix_1(rounding):
|
1556 |
+
st.dataframe(st.session_state.styled_cost_openai.format(precision=rounding), hide_index=True,)
|
1557 |
+
@st.cache_data
|
1558 |
+
def show_cost_matrix_2(rounding):
|
1559 |
+
st.dataframe(st.session_state.styled_cost_azure.format(precision=rounding), hide_index=True,)
|
1560 |
+
@st.cache_data
|
1561 |
+
def show_cost_matrix_3(rounding):
|
1562 |
+
st.dataframe(st.session_state.styled_cost_google.format(precision=rounding), hide_index=True,)
|
1563 |
+
@st.cache_data
|
1564 |
+
def show_cost_matrix_4(rounding):
|
1565 |
+
st.dataframe(st.session_state.styled_cost_mistral.format(precision=rounding), hide_index=True,)
|
1566 |
+
@st.cache_data
|
1567 |
+
def show_cost_matrix_5(rounding):
|
1568 |
+
st.dataframe(st.session_state.styled_cost_local.format(precision=rounding), hide_index=True,)
|
1569 |
|
|
|
1570 |
input_value, output_value = find_model_values(selected_model,
|
1571 |
[st.session_state['cost_openai'], st.session_state['cost_azure'], st.session_state['cost_google'], st.session_state['cost_mistral'], st.session_state['cost_local']])
|
1572 |
if input_value is not None and output_value is not None:
|
|
|
1574 |
with calculator_5:
|
1575 |
st.text_input("Total Cost", f"${round(cost,2)}") # selected_model
|
1576 |
|
1577 |
+
rounding = 4
|
1578 |
with col_cost_1:
|
1579 |
+
show_cost_matrix_1(rounding)
|
|
|
1580 |
with col_cost_2:
|
1581 |
+
show_cost_matrix_2(rounding)
|
1582 |
with col_cost_3:
|
1583 |
+
show_cost_matrix_3(rounding)
|
1584 |
with col_cost_4:
|
1585 |
+
show_cost_matrix_4(rounding)
|
1586 |
with col_cost_5:
|
1587 |
+
show_cost_matrix_5(rounding)
|
1588 |
+
|
1589 |
|
1590 |
|
1591 |
|
|
|
1603 |
selected_version = default_version
|
1604 |
st.session_state.config['leafmachine']['project']['prompt_version'] = st.selectbox("Prompt Version", available_prompts, index=available_prompts.index(selected_version),label_visibility='collapsed')
|
1605 |
|
1606 |
+
# with col_prompt_2:
|
1607 |
+
# # if st.button("Build Custom LLM Prompt"):
|
1608 |
+
# try:
|
1609 |
+
# st.page_link(os.path.join("pages","prompt_builder.py"), label="Prompt Builder", icon="π§")
|
1610 |
+
# except:
|
1611 |
+
# st.page_link(os.path.join(os.path.dirname(__file__),"pages","prompt_builder.py"), label="Prompt Builder", icon="π§")
|
1612 |
|
1613 |
|
1614 |
st.header('LLM Version')
|
|
|
1818 |
# elif (OCR_option == 'hand') and do_use_trOCR:
|
1819 |
# st.text_area(label='Handwritten/Printed + trOCR',placeholder=demo_text_trh,disabled=True, label_visibility='visible', height=150)
|
1820 |
|
1821 |
+
@st.cache_data
|
1822 |
+
def show_collage():
|
1823 |
+
# Load the image only if it's not already in the session state
|
1824 |
+
if "demo_collage" not in st.session_state:
|
1825 |
+
# ba = os.path.join(st.session_state.dir_home, 'demo', 'ba', 'ba2.png')
|
1826 |
+
ba = os.path.join(st.session_state.dir_home, 'demo', 'ba', 'ba2.png')
|
1827 |
+
st.session_state["demo_collage"] = Image.open(ba)
|
1828 |
+
with st.expander(":frame_with_picture: View an example of the LeafMachine2 collage image"):
|
1829 |
+
st.image(st.session_state["demo_collage"], caption='LeafMachine2 Collage', output_format="PNG")
|
1830 |
+
|
1831 |
+
@st.cache_data
|
1832 |
+
def show_ocr():
|
1833 |
+
if "demo_overlay" not in st.session_state:
|
1834 |
+
# ocr = os.path.join(st.session_state.dir_home,'demo', 'ba','ocr.png')
|
1835 |
+
ocr = os.path.join(st.session_state.dir_home,'demo', 'ba','ocr2.png')
|
1836 |
+
st.session_state["demo_overlay"] = Image.open(ocr)
|
1837 |
|
1838 |
+
with st.expander(":frame_with_picture: View an example of the OCR overlay image"):
|
1839 |
+
st.image(st.session_state["demo_overlay"], caption='OCR Overlay Images', output_format = "PNG")
|
1840 |
+
# st.image(st.session_state["demo_overlay"], caption='OCR Overlay Images', output_format = "JPEG")
|
1841 |
|
1842 |
def content_collage_overlay():
|
1843 |
+
st.markdown("---")
|
1844 |
col_collage, col_overlay = st.columns([4,4])
|
1845 |
|
1846 |
|
|
|
1849 |
st.header('LeafMachine2 Label Collage')
|
1850 |
st.info("NOTE: We strongly recommend enabling LeafMachine2 cropping if your images are full sized herbarium sheet. Often, the OCR algorithm struggles with full sheets, but works well with the collage images. We have disabled the collage by default for this Hugging Face Space because the Space lacks a GPU and the collage creation takes a bit longer.")
|
1851 |
default_crops = st.session_state.config['leafmachine']['cropped_components']['save_cropped_annotations']
|
1852 |
+
st.markdown("Prior to transcription, use LeafMachine2 to crop all labels from input images to create label collages for each specimen image. Showing just the text labels to the OCR algorithms significantly improves performance. This runs slowly on the free Hugging Face Space, but runs quickly with a fast CPU or any GPU.")
|
1853 |
st.session_state.config['leafmachine']['use_RGB_label_images'] = st.checkbox(":rainbow[Use LeafMachine2 label collage for transcriptions]", st.session_state.config['leafmachine'].get('use_RGB_label_images', False))
|
1854 |
|
1855 |
|
|
|
1857 |
options=['ruler', 'barcode','label', 'colorcard','map','envelope','photo','attached_item','weights',
|
1858 |
'leaf_whole', 'leaf_partial', 'leaflet', 'seed_fruit_one', 'seed_fruit_many', 'flower_one', 'flower_many', 'bud','specimen','roots','wood'],default=default_crops)
|
1859 |
st.session_state.config['leafmachine']['cropped_components']['save_cropped_annotations'] = option_selected_crops
|
1860 |
+
show_collage()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1861 |
|
1862 |
with col_overlay:
|
1863 |
st.header('OCR Overlay Image')
|
1864 |
|
1865 |
+
st.markdown('This will plot bounding boxes around all text that Google Vision was able to detect. If there are no boxes around text, then the OCR failed, so that missing text will not be seen by the LLM when it is creating the JSON object. The created image will be viewable in the VoucherVisionEditor.')
|
1866 |
|
1867 |
do_create_OCR_helper_image = st.checkbox("Create image showing an overlay of the OCR detections",value=st.session_state.config['leafmachine']['do_create_OCR_helper_image'],disabled=True)
|
1868 |
st.session_state.config['leafmachine']['do_create_OCR_helper_image'] = do_create_OCR_helper_image
|
1869 |
+
show_ocr()
|
|
|
|
|
|
|
|
|
1870 |
|
|
|
|
|
|
|
1871 |
|
1872 |
|
1873 |
|
|
|
1945 |
with col_v2:
|
1946 |
|
1947 |
|
1948 |
+
# print(f"Number of GPUs: {st.session_state.num_gpus}")
|
1949 |
+
# print(f"GPU Details: {st.session_state.gpu_dict}")
|
1950 |
+
# print(f"Total VRAM: {st.session_state.total_vram_gb} GB")
|
1951 |
+
# print(f"Capability Score: {st.session_state.capability_score}")
|
1952 |
|
1953 |
st.header('System GPU Information')
|
1954 |
st.markdown(f"**Torch CUDA:** {torch.cuda.is_available()}")
|
|
|
2025 |
#################################################################################################################################################
|
2026 |
# render_expense_report_summary #################################################################################################################
|
2027 |
#################################################################################################################################################
|
2028 |
+
@st.cache_data
|
2029 |
def render_expense_report_summary():
|
2030 |
expense_summary = st.session_state.expense_summary
|
2031 |
expense_report = st.session_state.expense_report
|
|
|
2136 |
#################################################################################################################################################
|
2137 |
# Sidebar #######################################################################################################################################
|
2138 |
#################################################################################################################################################
|
2139 |
+
@st.cache_data
|
2140 |
def sidebar_content():
|
|
|
|
|
|
|
|
|
|
|
2141 |
if not os.path.exists(os.path.join(st.session_state.dir_home,'expense_report')):
|
2142 |
validate_dir(os.path.join(st.session_state.dir_home,'expense_report'))
|
2143 |
expense_report_path = os.path.join(st.session_state.dir_home, 'expense_report', 'expense_report.csv')
|
|
|
2189 |
#################################################################################################################################################
|
2190 |
# Main ##########################################################################################################################################
|
2191 |
#################################################################################################################################################
|
2192 |
+
do_print_profiler = False
|
2193 |
if st.session_state['is_hf']:
|
2194 |
# if st.session_state.proceed_to_build_llm_prompt:
|
2195 |
# build_LLM_prompt_config()
|
2196 |
if st.session_state.proceed_to_main:
|
2197 |
+
if do_print_profiler:
|
2198 |
+
profiler = cProfile.Profile()
|
2199 |
+
profiler.enable()
|
2200 |
+
|
2201 |
main()
|
2202 |
+
|
2203 |
+
if do_print_profiler:
|
2204 |
+
profiler.disable()
|
2205 |
+
stats = pstats.Stats(profiler).sort_stats('cumulative')
|
2206 |
+
stats.print_stats(30)
|
2207 |
|
2208 |
else:
|
2209 |
if not st.session_state.private_file:
|
|
|
2213 |
elif st.session_state.proceed_to_private and not st.session_state['is_hf']:
|
2214 |
create_private_file()
|
2215 |
elif st.session_state.proceed_to_main:
|
2216 |
+
if do_print_profiler:
|
2217 |
+
profiler = cProfile.Profile()
|
2218 |
+
profiler.enable()
|
2219 |
+
|
2220 |
main()
|
2221 |
|
2222 |
+
if do_print_profiler:
|
2223 |
+
profiler.disable()
|
2224 |
+
stats = pstats.Stats(profiler).sort_stats('cumulative')
|
2225 |
+
stats.print_stats(30)
|
2226 |
+
|
2227 |
|
2228 |
|
2229 |
|
pages/faqs.py
CHANGED
@@ -7,12 +7,15 @@ st.set_page_config(layout="wide", page_icon='img/icon.ico', page_title='VV FAQs'
|
|
7 |
def display_faqs():
|
8 |
c1, c2, c3 = st.columns([4,6,1])
|
9 |
with c3:
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
|
|
|
|
|
|
16 |
with c2:
|
17 |
st.write('If you would like to get more involved, have questions, would like to see additional features, then please fill out this [Google Form](https://docs.google.com/forms/d/e/1FAIpQLSe2E9zU1bPJ1BW4PMakEQFsRmLbQ0WTBI2UXHIMEFm4WbnAVw/viewform?usp=sf_link)')
|
18 |
components.iframe(f"https://docs.google.com/forms/d/e/1FAIpQLSe2E9zU1bPJ1BW4PMakEQFsRmLbQ0WTBI2UXHIMEFm4WbnAVw/viewform?embedded=true", height=900,scrolling=True,width=640)
|
|
|
7 |
def display_faqs():
|
8 |
c1, c2, c3 = st.columns([4,6,1])
|
9 |
with c3:
|
10 |
+
try:
|
11 |
+
st.page_link('app.py', label="Home", icon="π ")
|
12 |
+
st.page_link(os.path.join("pages","faqs.py"), label="FAQs", icon="β")
|
13 |
+
st.page_link(os.path.join("pages","report_bugs.py"), label="Report a Bug", icon="β οΈ")
|
14 |
+
except:
|
15 |
+
st.page_link(os.path.join(os.path.dirname(os.path.dirname(__file__)),'app.py'), label="Home", icon="π ")
|
16 |
+
st.page_link(os.path.join(os.path.dirname(os.path.dirname(__file__)),"pages","faqs.py"), label="FAQs", icon="β")
|
17 |
+
st.page_link(os.path.join(os.path.dirname(os.path.dirname(__file__)),"pages","report_bugs.py"), label="Report a Bug", icon="β οΈ")
|
18 |
+
|
19 |
with c2:
|
20 |
st.write('If you would like to get more involved, have questions, would like to see additional features, then please fill out this [Google Form](https://docs.google.com/forms/d/e/1FAIpQLSe2E9zU1bPJ1BW4PMakEQFsRmLbQ0WTBI2UXHIMEFm4WbnAVw/viewform?usp=sf_link)')
|
21 |
components.iframe(f"https://docs.google.com/forms/d/e/1FAIpQLSe2E9zU1bPJ1BW4PMakEQFsRmLbQ0WTBI2UXHIMEFm4WbnAVw/viewform?embedded=true", height=900,scrolling=True,width=640)
|
pages/prompt_builder.py
CHANGED
@@ -138,11 +138,14 @@ def build_LLM_prompt_config():
|
|
138 |
st.session_state.logo = Image.open(st.session_state.logo_path)
|
139 |
st.image(st.session_state.logo, width=250)
|
140 |
with col_main2:
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
|
|
|
|
|
|
146 |
|
147 |
st.session_state['assigned_columns'] = []
|
148 |
st.session_state['default_prompt_author'] = 'unknown'
|
|
|
138 |
st.session_state.logo = Image.open(st.session_state.logo_path)
|
139 |
st.image(st.session_state.logo, width=250)
|
140 |
with col_main2:
|
141 |
+
try:
|
142 |
+
st.page_link('app.py', label="Home", icon="π ")
|
143 |
+
st.page_link(os.path.join("pages","faqs.py"), label="FAQs", icon="β")
|
144 |
+
st.page_link(os.path.join("pages","report_bugs.py"), label="Report a Bug", icon="β οΈ")
|
145 |
+
except:
|
146 |
+
st.page_link(os.path.join(os.path.dirname(os.path.dirname(__file__)),'app.py'), label="Home", icon="π ")
|
147 |
+
st.page_link(os.path.join(os.path.dirname(os.path.dirname(__file__)),"pages","faqs.py"), label="FAQs", icon="β")
|
148 |
+
st.page_link(os.path.join(os.path.dirname(os.path.dirname(__file__)),"pages","report_bugs.py"), label="Report a Bug", icon="β οΈ")
|
149 |
|
150 |
st.session_state['assigned_columns'] = []
|
151 |
st.session_state['default_prompt_author'] = 'unknown'
|
pages/report_bugs.py
CHANGED
@@ -7,9 +7,14 @@ st.set_page_config(layout="wide", page_icon='img/icon.ico', page_title='VV Repor
|
|
7 |
def display_report():
|
8 |
c1, c2, c3 = st.columns([4,6,1])
|
9 |
with c3:
|
10 |
-
|
11 |
-
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
with c2:
|
15 |
st.write('To report a bug or request a new feature please fill out this [Google Form](https://docs.google.com/forms/d/e/1FAIpQLSdtW1z9Q1pGZTo5W9UeCa6PlQanP-b88iNKE6zsusRI78Itsw/viewform?usp=sf_link)')
|
|
|
7 |
def display_report():
|
8 |
c1, c2, c3 = st.columns([4,6,1])
|
9 |
with c3:
|
10 |
+
try:
|
11 |
+
st.page_link('app.py', label="Home", icon="π ")
|
12 |
+
st.page_link(os.path.join("pages","faqs.py"), label="FAQs", icon="β")
|
13 |
+
st.page_link(os.path.join("pages","report_bugs.py"), label="Report a Bug", icon="β οΈ")
|
14 |
+
except:
|
15 |
+
st.page_link(os.path.join(os.path.dirname(os.path.dirname(__file__)),'app.py'), label="Home", icon="π ")
|
16 |
+
st.page_link(os.path.join(os.path.dirname(os.path.dirname(__file__)),"pages","faqs.py"), label="FAQs", icon="β")
|
17 |
+
st.page_link(os.path.join(os.path.dirname(os.path.dirname(__file__)),"pages","report_bugs.py"), label="Report a Bug", icon="β οΈ")
|
18 |
|
19 |
with c2:
|
20 |
st.write('To report a bug or request a new feature please fill out this [Google Form](https://docs.google.com/forms/d/e/1FAIpQLSdtW1z9Q1pGZTo5W9UeCa6PlQanP-b88iNKE6zsusRI78Itsw/viewform?usp=sf_link)')
|