Spaces:

phyloforfun
/

VoucherVision

Running

App Files Files Community

phyloforfun commited on Nov 6, 2023

Commit

87c3140

1 Parent(s): 34de6b9

Add application file

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitignore +199 -0
LICENSE +674 -0
VoucherVision_Reference.yaml +103 -0
__init__.py +0 -0
api_cost/api_cost.yaml +9 -0
app.py +1344 -0
bin/version.yml +2 -0
create_desktop_shortcut.py +69 -0
custom_prompts/required_structure.yaml +62 -0
custom_prompts/version_2.yaml +229 -0
custom_prompts/version_2_OSU.yaml +230 -0
demo/ba/ba.jpg +3 -0
demo/ba/ba.png +3 -0
demo/ba/ba2.png +3 -0
demo/demo_gallery/NY_1928185102_Heliotropiaceae_Heliotropium_indicum.jpg +3 -0
demo/demo_gallery/SMF_3046042583_Ebenaceae_Diospyros_mespiliformis.jpg +3 -0
demo/demo_gallery/UM_1807475718_Monimiaceae_Hedycarya_parvifolia.jpg +3 -0
demo/demo_gallery/UM_1915455196_Cardiopteridaceae_Citronella_sarmentosa.jpg +3 -0
demo/demo_images/UM_1807464860_Phellinaceae_Phelline_dumbeensis.jpg +3 -0
demo/img/expense_report.PNG +3 -0
demo/img/prompt_1.PNG +3 -0
demo/img/prompt_2.PNG +3 -0
demo/img/prompt_3.PNG +3 -0
demo/img/prompt_4.PNG +3 -0
demo/img/prompt_5.PNG +3 -0
demo/img/validation_1.PNG +3 -0
demo/img/validation_gpt.PNG +3 -0
demo/img/validation_gpu.PNG +3 -0
demo/img/validation_palm.PNG +3 -0
domain_knowledge/SLTP_UM_AllAsiaMinimalInRegion.xlsx +0 -0
img/icon.ico +0 -0
img/icon.jpg +3 -0
img/icon2.ico +0 -0
img/logo.png +3 -0
requirements.txt +34 -0
run_VoucherVision.py +31 -0
vouchervision/LLM_Falcon.py +112 -0
vouchervision/LLM_PaLM.py +209 -0
vouchervision/LLM_chatGPT_3_5.py +420 -0
vouchervision/LM2_logger.py +117 -0
vouchervision/LeafMachine2_Config_Builder.py +246 -0
vouchervision/OCR_google_cloud_vision.py +107 -0
vouchervision/PaLM_example_script.py +70 -0
vouchervision/VoucherVision_Config_Builder.py +576 -0
vouchervision/component_detector/LICENSE +674 -0
vouchervision/component_detector/__init__.py +0 -0
vouchervision/component_detector/armature_processing.py +1047 -0
vouchervision/component_detector/color_profiles/ColorProfile__LANDMARK.csv +9 -0
vouchervision/component_detector/color_profiles/ColorProfile__LANDMARK_ARM.csv +4 -0
vouchervision/component_detector/color_profiles/ColorProfile__PLANT.csv +11 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,199 @@

+# Files
+PRIVATE_DATA.yaml
+LeafMachine2_TRAINING_ONLY.yaml
+LeafMachine2_TEMPLATE.yaml
+LeafMachine2_WW.yaml
+yolov8x-pose.pt
+yolov8n.pt
+*PRIVATE_DATA*
+# Dirs
+demo/demo_output/*
+demo/demo_configs/*
+wandb/
+venv_LM2_linux/
+venv_LM2_l/
+venv_LM2_310/
+venv_LM2_38/
+venv_LM2/
+venv_VV/
+tests/
+.vscode/
+runs/
+KP_Test/
+# VV Specific
+.streamlit*/
+demo/demo_output/*
+demo/validation_configs/*
+/bin/*
+!/bin/version.yml
+release*
+expense_report/*
+/custom_prompts/*
+!/custom_prompts/required_structure.yaml
+!/custom_prompts/version_2.yaml
+!/custom_prompts/version_2_OSU.yaml
+leafmachine2/*/.gitignore
+/bin/*
+!/bin/version.yml
+vouchervision/release_manager/
+vouchervision/component_detector/datasets/
+vouchervision/component_detector/wandb/
+vouchervision/component_detector/runs/
+vouchervision/component_detector/architecture/
+vouchervision/component_detector/yolov5x6.pt
+vouchervision/instructor-xl/
+vouchervision/instructor-embedding/
+vouchervision/SLTP_*
+vouchervision/gradio_ocr.py
+vouchervision/build_dataset.py
+vouchervision/evaluate_LLM_predictions.py
+vouchervision/QLoRa__x__GPT-NeoX-20B.py
+vouchervision/QLoRa_GPT_NeoX_20B.py
+vouchervision/run_VoucherVision_gradio.py
+vouchervision/stratify_groundtruth_transcriptions.py
+leafmachine2/component_detector/runs/
+leafmachine2/component_detector/architecture/
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+venv_LM2/
+venv_LM2_linux/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+VoucherVision.yaml

LICENSE ADDED Viewed

	@@ -0,0 +1,674 @@

+                    GNU GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+                            Preamble
+  The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.  We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors.  You can apply it to
+your programs, too.
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+  To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights.  Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received.  You must make sure that they, too, receive
+or can get the source code.  And you must show them these terms so they
+know their rights.
+  Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+  For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software.  For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+  Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so.  This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software.  The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable.  Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products.  If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+  Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary.  To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+  The precise terms and conditions for copying, distribution and
+modification follow.
+                       TERMS AND CONDITIONS
+  0. Definitions.
+  "This License" refers to version 3 of the GNU General Public License.
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+  1. Source Code.
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+  The Corresponding Source for a work in source code form is that
+same work.
+  2. Basic Permissions.
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+  4. Conveying Verbatim Copies.
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+  5. Conveying Modified Source Versions.
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+  6. Conveying Non-Source Forms.
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+  7. Additional Terms.
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+  8. Termination.
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+  9. Acceptance Not Required for Having Copies.
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+  10. Automatic Licensing of Downstream Recipients.
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+  11. Patents.
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+  12. No Surrender of Others' Freedom.
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+  13. Use with the GNU Affero General Public License.
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+  14. Revised Versions of this License.
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+  If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+  15. Disclaimer of Warranty.
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+  16. Limitation of Liability.
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+  17. Interpretation of Sections 15 and 16.
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+                     END OF TERMS AND CONDITIONS
+            How to Apply These Terms to Your New Programs
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+Also add information on how to contact you by electronic and paper mail.
+  If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+    <program>  Copyright (C) <year>  <name of author>
+    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<https://www.gnu.org/licenses/>.
+  The GNU General Public License does not permit incorporating your program
+into proprietary programs.  If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.  But first, please read
+<https://www.gnu.org/licenses/why-not-lgpl.html>.

VoucherVision_Reference.yaml ADDED Viewed

	@@ -0,0 +1,103 @@

+# To use default value, set to null
+leafmachine:
+    use_RGB_label_images: True
+    do:
+        check_for_illegal_filenames: False
+        check_for_corrupt_images_make_vertical: False
+    print:
+        verbose: True
+        optional_warnings: True
+    logging:
+        log_level: null
+    # Overall Project Input Settings
+    project:
+        # Image to Process
+        dir_images_local: 'D:\Dropbox\LM2_Env\VoucherVision_Datasets\2022_09_07_thru12_S3_jacortez_AllAsia' # 'D:/Dropbox/LM2_Env/VoucherVision_Datasets/Compare_Set_Easy_10imgs/imgs' #'D:\D_Desktop\Richie\Imgs' #'D:/Dropbox/LM2_Env/Image_Datasets/Acacia/Acacia_prickles_4-26-23_LANCZOS/images/short' #'D:\D_Desktop\Richie\Imgs' #'home/brlab/Dropbox/LM2_Env/Image_Datasets/Manuscript_Images' # 'D:\Dropbox\LM2_Env\Image_Datasets\SET_FieldPrism_Test\TESTING_OUTPUT\Images_Processed\REU_Field_QR-Code-Images\Cannon_Corrected\Images_Corrected' # 'F:\temp_3sppFamily' # 'D:/Dropbox/LM2_Env/Image_Datasets/GBIF_BroadSample_3SppPerFamily' # SET_Diospyros/images_short' # 'D:/Dropbox/LM2_Env/Image_Datasets/SET_Diospyros/images_short' #'D:\Dropbox\LM2_Env\Image_Datasets\GBIF_BroadSample_Herbarium'  #'D:/Dropbox/LM2_Env/Image_Datasets/SET_Diospyros/images_short'  # str | only for image_location:local | full path for directory containing images
+        # dir_images_local: 'D:/Dropbox/LM2_Env/VoucherVision_Datasets/Compare_Set_Easy_10imgs/imgs' #'D:\D_Desktop\Richie\Imgs' #'D:/Dropbox/LM2_Env/Image_Datasets/Acacia/Acacia_prickles_4-26-23_LANCZOS/images/short' #'D:\D_Desktop\Richie\Imgs' #'home/brlab/Dropbox/LM2_Env/Image_Datasets/Manuscript_Images' # 'D:\Dropbox\LM2_Env\Image_Datasets\SET_FieldPrism_Test\TESTING_OUTPUT\Images_Processed\REU_Field_QR-Code-Images\Cannon_Corrected\Images_Corrected' # 'F:\temp_3sppFamily' # 'D:/Dropbox/LM2_Env/Image_Datasets/GBIF_BroadSample_3SppPerFamily' # SET_Diospyros/images_short' # 'D:/Dropbox/LM2_Env/Image_Datasets/SET_Diospyros/images_short' #'D:\Dropbox\LM2_Env\Image_Datasets\GBIF_BroadSample_Herbarium'  #'D:/Dropbox/LM2_Env/Image_Datasets/SET_Diospyros/images_short'  # str | only for image_location:local | full path for directory containing images
+        image_location: 'local'
+        continue_run_from_partial_xlsx: 'D:\Dropbox\LM2_Env\VoucherVision_Datasets\POC_chatGPT__2022_09_07_thru12_S3_jacortez_AllAsia\2022_09_07_thru12_S3_jacortez_AllAsia\Transcription\transcribed.xlsx'
+        # continue_run_from_partial_xlsx: null
+        # Project Output Dir
+        dir_output: 'D:/Dropbox/LM2_Env/VoucherVision_Datasets/POC_chatGPT__2022_09_07_thru12_S3_jacortez_AllAsia' # 'D:/Dropbox/LM2_Env/Image_Datasets/TEST_LM2' # 'D:\D_Desktop\Richie\Richie_Out'
+        run_name: 'POC_chatGPT' #'images_short_TEST' #'images_short_landmark'
+        prefix_removal: 'MICH-V-'
+        suffix_removal: ''
+        catalog_numerical_only: True
+        # Embeddings and LLM
+        use_domain_knowledge: True
+        embeddings_database_name: 'EmbeddingsDB_all_asia_minimal_InRegion'
+        build_new_embeddings_database: False
+        path_to_domain_knowledge_xlsx: 'D:\Dropbox\LeafMachine2\leafmachine2\transcription\domain_knowledge/AllAsiaMinimalasof25May2023_2__InRegion.xlsx' #'D:/Dropbox/LeafMachine2/leafmachine2/transcription/domain_knowledge/AllAsiaMinimalasof25May2023_2__TRIMMEDtiny.xlsx'
+        batch_size: 500 #null # null = all
+        num_workers: 1 # int |DEFAULT| 4 # More is not always better. Most hardware loses performance after 4
+    modules:
+        specimen_crop: True
+    LLM_version: 'chatGPT' # from 'chatGPT' OR 'PaLM'
+    cropped_components:
+        # empty list for all, add to list to IGNORE, lowercase, comma seperated
+        # archival |FROM|
+        #           ruler, barcode, colorcard, label, map, envelope, photo, attached_item, weights
+        # plant |FROM|
+        #           leaf_whole, leaf_partial, leaflet, seed_fruit_one, seed_fruit_many, flower_one, flower_many, bud, specimen, roots, wood
+        do_save_cropped_annotations: True
+        save_cropped_annotations: ['label','barcode'] # 'save_all' to save all classes
+        save_per_image: False # creates a folder for each image, saves crops into class-names folders # TODO
+        save_per_annotation_class: True # saves crops into class-names folders
+        binarize_labels: False
+        binarize_labels_skeletonize: False
+    data:
+        save_json_rulers: False
+        save_json_measurements: False
+        save_individual_csv_files_rulers: False
+        save_individual_csv_files_measurements: False
+        include_darwin_core_data_from_combined_file: False
+        do_apply_conversion_factor: False ###########################
+    overlay:
+        save_overlay_to_pdf: True
+        save_overlay_to_jpgs: True
+        overlay_dpi: 300 # int |FROM| 100 to 300
+        overlay_background_color: 'black' # str |FROM| 'white' or 'black'
+        show_archival_detections: True
+        ignore_archival_detections_classes: []
+        show_plant_detections: True
+        ignore_plant_detections_classes: ['leaf_whole', 'specimen'] #['leaf_whole', 'leaf_partial', 'specimen']
+        show_segmentations: True
+        show_landmarks: True
+        ignore_landmark_classes: []
+        line_width_archival: 2 # int
+        line_width_plant: 6 # int
+        line_width_seg: 12 # int # thick = 12
+        line_width_efd: 6 # int # thick = 3
+        alpha_transparency_archival: 0.3  # float between 0 and 1
+        alpha_transparency_plant: 0
+        alpha_transparency_seg_whole_leaf: 0.4
+        alpha_transparency_seg_partial_leaf: 0.3
+    # Configure Archival Component Detector
+    archival_component_detector:
+        # ./leafmachine2/component_detector/runs/train/detector_type/detector_version/detector_iteration/weights/detector_weights
+        detector_type: 'Archival_Detector'
+        detector_version: 'PREP_final'
+        detector_iteration: 'PREP_final'
+        detector_weights: 'best.pt'
+        minimum_confidence_threshold: 0.5
+        do_save_prediction_overlay_images: True
+        ignore_objects_for_overlay: [] # list[str] # list of objects that can be excluded from the overlay # all = null

__init__.py ADDED Viewed

File without changes

api_cost/api_cost.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+GPT_3_5:
+  in: 0.0015
+  out: 0.002
+GPT_4:
+  in: 0.03
+  out: 0.06
+PALM2:
+  in: 0.0
+  out: 0.0

app.py ADDED Viewed

	@@ -0,0 +1,1344 @@

+import streamlit as st
+import yaml, os, json, random, time, re
+import matplotlib.pyplot as plt
+import plotly.graph_objs as go
+import numpy as np
+from itertools import chain
+from PIL import Image
+import pandas as pd
+from typing import Union
+from streamlit_extras.let_it_rain import rain
+from vouchervision.LeafMachine2_Config_Builder import write_config_file
+from vouchervision.VoucherVision_Config_Builder import build_VV_config, run_demo_tests_GPT, run_demo_tests_Palm , TestOptionsGPT, TestOptionsPalm, check_if_usable, run_api_tests
+from vouchervision.vouchervision_main import voucher_vision, voucher_vision_OCR_test
+from vouchervision.general_utils import test_GPU, get_cfg_from_full_path, summarize_expense_report, create_google_ocr_yaml_config, validate_dir
+PROMPTS_THAT_NEED_DOMAIN_KNOWLEDGE = ["Version 1","Version 1 PaLM 2"]
+COLORS_EXPENSE_REPORT = {
+        'GPT_4': '#8fff66',    # Bright Green
+        'GPT_3_5': '#006400',  # Dark Green
+        'PALM2': '#66a8ff'     # blue
+    }
+class ProgressReport:
+    def __init__(self, overall_bar, batch_bar, text_overall, text_batch):
+        self.overall_bar = overall_bar
+        self.batch_bar = batch_bar
+        self.text_overall = text_overall
+        self.text_batch = text_batch
+        self.current_overall_step = 0
+        self.total_overall_steps = 20  # number of major steps in machine function
+        self.current_batch = 0
+        self.total_batches = 20
+    def update_overall(self, step_name=""):
+        self.current_overall_step += 1
+        self.overall_bar.progress(self.current_overall_step / self.total_overall_steps)
+        self.text_overall.text(step_name)
+    def update_batch(self, step_name=""):
+        self.current_batch += 1
+        self.batch_bar.progress(self.current_batch / self.total_batches)
+        self.text_batch.text(step_name)
+    def set_n_batches(self, n_batches):
+        self.total_batches = n_batches
+    def set_n_overall(self, total_overall_steps):
+        self.total_overall_steps = total_overall_steps
+    def reset_batch(self, step_name):
+        self.current_batch = 0
+        self.batch_bar.progress(0)
+        self.text_batch.text(step_name)
+    def reset_overall(self, step_name):
+        self.current_overall_step = 0
+        self.overall_bar.progress(0)
+        self.text_overall.text(step_name)
+    def get_n_images(self):
+        return self.n_images
+    def get_n_overall(self):
+        return self.total_overall_steps
+def does_private_file_exist():
+    dir_home = os.path.dirname(os.path.dirname(__file__))
+    path_cfg_private = os.path.join(dir_home, 'PRIVATE_DATA.yaml')
+    return os.path.exists(path_cfg_private)
+def setup_streamlit_config(dir_home):
+    # Define the directory path and filename
+    dir_path = os.path.join(dir_home, ".streamlit")
+    file_path = os.path.join(dir_path, "config.toml")
+    # Check if directory exists, if not create it
+    if not os.path.exists(dir_path):
+        os.makedirs(dir_path)
+    # Create or modify the file with the provided content
+    config_content = f"""
+    [theme]
+    base = "dark"
+    primaryColor = "#00ff00"
+    [server]
+    enableStaticServing = false
+    runOnSave = true
+    port = 8524
+    """
+    with open(file_path, "w") as f:
+        f.write(config_content.strip())
+def display_scrollable_results(JSON_results, test_results, OPT2, OPT3):
+    """
+    Display the results from JSON_results in a scrollable container.
+    """
+    # Initialize the container
+    con_results = st.empty()
+    with con_results.container():
+        # Start the custom container for all the results
+        results_html = """<div class='scrollable-results-container'>"""
+        for idx, (test_name, _) in enumerate(sorted(test_results.items())):
+            _, ind_opt1, ind_opt2, ind_opt3 = test_name.split('__')
+            opt2_readable = "Use LeafMachine2" if OPT2[int(ind_opt2.split('-')[1])] else "Don't use LeafMachine2"
+            opt3_readable = f"{OPT3[int(ind_opt3.split('-')[1])]}"
+            if JSON_results[idx] is None:
+                results_html += f"<p>None</p>"
+            else:
+                formatted_json = json.dumps(JSON_results[idx], indent=4)
+                results_html += f"<pre>[{opt2_readable}] + [{opt3_readable}]<br/>{formatted_json}</pre>"
+        # End the custom container
+        results_html += """</div>"""
+        # The CSS to make this container scrollable
+        css = """
+        <style>
+            .scrollable-results-container {
+                overflow-y: auto;
+                height: 600px;
+                width: 100%;
+                white-space: pre-wrap;  # To wrap the content
+                font-family: monospace;  # To give the JSON a code-like appearance
+            }
+        </style>
+        """
+        # Apply the CSS and then the results
+        st.markdown(css, unsafe_allow_html=True)
+        st.markdown(results_html, unsafe_allow_html=True)
+def display_test_results(test_results, JSON_results, llm_version):
+    if llm_version == 'gpt':
+        OPT1, OPT2, OPT3 = TestOptionsGPT.get_options()
+    elif llm_version == 'palm':
+        OPT1, OPT2, OPT3 = TestOptionsPalm.get_options()
+    else:
+        raise
+    widths = [1] * (len(OPT1) + 2) + [2]
+    columns = st.columns(widths)
+    with columns[0]:
+        st.write("LeafMachine2")
+    with columns[1]:
+        st.write("Prompt")
+    with columns[len(OPT1) + 2]:
+        st.write("Scroll to See Last Transcription in Each Test")
+    already_written = set()
+    for test_name, result in sorted(test_results.items()):
+        _, ind_opt1, _, _ = test_name.split('__')
+        option_value = OPT1[int(ind_opt1.split('-')[1])]
+        if option_value not in already_written:
+            with columns[int(ind_opt1.split('-')[1]) + 2]:
+                st.write(option_value)
+            already_written.add(option_value)
+    printed_options = set()
+    with columns[-1]:
+        display_scrollable_results(JSON_results, test_results, OPT2, OPT3)
+    # Close the custom container
+    st.write('</div>', unsafe_allow_html=True)
+    for idx, (test_name, result) in enumerate(sorted(test_results.items())):
+        _, ind_opt1, ind_opt2, ind_opt3 = test_name.split('__')
+        opt2_readable = "Use LeafMachine2" if OPT2[int(ind_opt2.split('-')[1])] else "Don't use LeafMachine2"
+        opt3_readable = f"{OPT3[int(ind_opt3.split('-')[1])]}"
+        if (opt2_readable, opt3_readable) not in printed_options:
+            with columns[0]:
+                st.info(f"{opt2_readable}")
+                st.write('---')
+            with columns[1]:
+                st.info(f"{opt3_readable}")
+                st.write('---')
+            printed_options.add((opt2_readable, opt3_readable))
+        with columns[int(ind_opt1.split('-')[1]) + 2]:
+            if result:
+                st.success(f"Test Passed")
+            else:
+                st.error(f"Test Failed")
+            st.write('---')
+    # success_count = sum(1 for result in test_results.values() if result)
+    # failure_count = len(test_results) - success_count
+    # proportional_rain("🥇", success_count, "💔", failure_count, font_size=72, falling_speed=5, animation_length="infinite")
+    rain_emojis(test_results)
+def add_emoji_delay():
+    time.sleep(0.3)
+def rain_emojis(test_results):
+    # test_results = {
+    #     'test1': True,   # Test passed
+    #     'test2': True,   # Test passed
+    #     'test3': True,   # Test passed
+    #     'test4': False,  # Test failed
+    #     'test5': False,  # Test failed
+    #     'test6': False,  # Test failed
+    #     'test7': False,  # Test failed
+    #     'test8': False,  # Test failed
+    #     'test9': False,  # Test failed
+    #     'test10': False,  # Test failed
+    # }
+    success_emojis = ["🥇", "🏆", "🍾", "🙌"]
+    failure_emojis = ["💔", "😭"]
+    success_count = sum(1 for result in test_results.values() if result)
+    failure_count = len(test_results) - success_count
+    chosen_emoji = random.choice(success_emojis)
+    for _ in range(success_count):
+        rain(
+            emoji=chosen_emoji,
+            font_size=72,
+            falling_speed=4,
+            animation_length=2,
+        )
+        add_emoji_delay()
+    chosen_emoji = random.choice(failure_emojis)
+    for _ in range(failure_count):
+        rain(
+            emoji=chosen_emoji,
+            font_size=72,
+            falling_speed=5,
+            animation_length=1,
+        )
+        add_emoji_delay()
+def get_prompt_versions(LLM_version):
+    yaml_files = [f for f in os.listdir(os.path.join(st.session_state.dir_home, 'custom_prompts')) if f.endswith('.yaml')]
+    if LLM_version in ["GPT 4", "GPT 3.5", "Azure GPT 4", "Azure GPT 3.5"]:
+        versions = ["Version 1", "Version 1 No Domain Knowledge", "Version 2"]
+        return (versions + yaml_files, "Version 2")
+    elif LLM_version in ["PaLM 2",]:
+        versions = ["Version 1 PaLM 2", "Version 1 PaLM 2 No Domain Knowledge", "Version 2 PaLM 2"]
+        return (versions + yaml_files, "Version 2 PaLM 2")
+    else:
+        # Handle other cases or raise an error
+        return (yaml_files, None)
+def get_private_file():
+    dir_home = os.path.dirname(os.path.dirname(__file__))
+    path_cfg_private = os.path.join(dir_home, 'PRIVATE_DATA.yaml')
+    return get_cfg_from_full_path(path_cfg_private)
+def create_space_saver():
+    st.subheader("Space Saving Options")
+    col_ss_1, col_ss_2 = st.columns([2,2])
+    with col_ss_1:
+        st.write("Several folders are created and populated with data during the VoucherVision transcription process.")
+        st.write("Below are several options that will allow you to automatically delete temporary files that you may not need for everyday operations.")
+        st.write("VoucherVision creates the following folders. Folders marked with a :star: are required if you want to use VoucherVisionEditor for quality control.")
+        st.write("`../[Run Name]/Archival_Components`")
+        st.write("`../[Run Name]/Config_File`")
+        st.write("`../[Run Name]/Cropped_Images` :star:")
+        st.write("`../[Run Name]/Logs`")
+        st.write("`../[Run Name]/Original_Images` :star:")
+        st.write("`../[Run Name]/Transcription` :star:")
+    with col_ss_2:
+        st.session_state.config['leafmachine']['project']['delete_temps_keep_VVE'] = st.checkbox("Delete Temporary Files (KEEP files required for VoucherVisionEditor)", st.session_state.config['leafmachine']['project'].get('delete_temps_keep_VVE', False))
+        st.session_state.config['leafmachine']['project']['delete_all_temps'] = st.checkbox("Keep only the final transcription file", st.session_state.config['leafmachine']['project'].get('delete_all_temps', False),help="*WARNING:* This limits your ability to do quality assurance. This will delete all folders created by VoucherVision, leaving only the `transcription.xlsx` file.")
+# def create_private_file():
+#     st.session_state.proceed_to_main = False
+#     if st.session_state.private_file:
+#         cfg_private = get_private_file()
+#         create_private_file_0(cfg_private)
+#     else:
+#         st.title("VoucherVision")
+#         create_private_file_0()
+def create_private_file():
+    st.session_state.proceed_to_main = False
+    st.title("VoucherVision")
+    col_private,_= st.columns([12,2])
+    if st.session_state.private_file:
+        cfg_private = get_private_file()
+    else:
+        cfg_private = {}
+        cfg_private['openai'] = {}
+        cfg_private['openai']['OPENAI_API_KEY'] =''
+        cfg_private['openai_azure'] = {}
+        cfg_private['openai_azure']['openai_api_key'] = ''
+        cfg_private['openai_azure']['api_version'] = ''
+        cfg_private['openai_azure']['openai_api_base'] =''
+        cfg_private['openai_azure']['openai_organization'] =''
+        cfg_private['openai_azure']['openai_api_type'] =''
+        cfg_private['google_cloud'] = {}
+        cfg_private['google_cloud']['path_json_file'] =''
+        cfg_private['google_palm'] = {}
+        cfg_private['google_palm']['google_palm_api'] =''
+    with col_private:
+        st.header("Set API keys")
+        st.info("***Note:*** There is a known bug with tabs in Streamlit. If you update an input field it may take you back to the 'Project Settings' tab. Changes that you made are saved, it's just an annoying glitch. We are aware of this issue and will fix it as soon as we can.")
+        st.warning("To commit changes to API keys you must press the 'Set API Keys' button at the bottom of the page.")
+        st.write("Before using VoucherVision you must set your API keys. All keys are stored locally on your computer and are never made public.")
+        st.write("API keys are stored in `../VoucherVision/PRIVATE_DATA.yaml`.")
+        st.write("Deleting this file will allow you to reset API keys. Alternatively, you can edit the keys in the user interface.")
+        st.write("Leave keys blank if you do not intend to use that service.")
+        st.write("---")
+        st.subheader("Google Vision  (*Required*)")
+        st.markdown("VoucherVision currently uses [Google Vision API](https://cloud.google.com/vision/docs/ocr) for OCR. Generating an API key for this is more involved than the others. [Please carefully follow the instructions outlined here to create and setup your account.](https://cloud.google.com/vision/docs/setup) ")
+        st.markdown("""
+        Once your account is created, [visit this page](https://console.cloud.google.com) and create a project. Then follow these instructions:
+        - **Select your Project**: If you have multiple projects, ensure you select the one where you've enabled the Vision API.
+        - **Open the Navigation Menu**: Click on the hamburger menu (three horizontal lines) in the top left corner.
+        - **Go to IAM & Admin**: In the navigation pane, hover over "IAM & Admin" and then click on "Service accounts."
+        - **Locate Your Service Account**: Find the service account for which you wish to download the JSON key. If you haven't created a service account yet, you'll need to do so by clicking the "CREATE SERVICE ACCOUNT" button at the top.
+        - **Download the JSON Key**:
+            - Click on the three dots (actions menu) on the right side of your service account name.
+            - Select "Manage keys."
+            - In the pop-up window, click on the "ADD KEY" button and select "JSON."
+            - The JSON key file will automatically be downloaded to your computer.
+        - **Store Safely**: This file contains sensitive data that can be used to authenticate and bill your Google Cloud account. Never commit it to public repositories or expose it in any way. Always keep it safe and secure.
+        """)
+        with st.container():
+            c_in_ocr, c_button_ocr = st.columns([10,2])
+            with c_in_ocr:
+                google_vision = st.text_input(label = 'Full path to Google Cloud JSON API key file', value = cfg_private['google_cloud'].get('path_json_file', ''),
+                                                 placeholder = 'e.g. C:/Documents/Secret_Files/google_API/application_default_credentials.json',
+                                                 help ="This API Key is in the form of a JSON file. Please save the JSON file in a safe directory. DO NOT store the JSON key inside of the VoucherVision directory.",
+                                                 type='password',key='924857298734590283750932809238')
+            with c_button_ocr:
+                st.empty()
+        st.write("---")
+        st.subheader("OpenAI")
+        st.markdown("API key for first-party OpenAI API. Create an account with OpenAI [here](https://platform.openai.com/signup), then create an API key [here](https://platform.openai.com/account/api-keys).")
+        with st.container():
+            c_in_openai, c_button_openai = st.columns([10,2])
+            with c_in_openai:
+                openai_api_key = st.text_input("openai_api_key", cfg_private['openai'].get('OPENAI_API_KEY', ''),
+                                                 help='The actual API key. Likely to be a string of 2 character, a dash, and then a 48-character string: sk-XXXXXXXX...',
+                                                 placeholder = 'e.g. sk-XXXXXXXX...',
+                                                 type='password')
+            with c_button_openai:
+                st.empty()
+        st.write("---")
+        st.subheader("OpenAI - Azure")
+        st.markdown("This version OpenAI relies on Azure servers directly as is intended for private enterprise instances of OpenAI's services, such as [UM-GPT](https://its.umich.edu/computing/ai). Administrators will provide you with the following information.")
+        azure_openai_api_version = st.text_input("azure_openai_api_version", cfg_private['openai_azure'].get('api_version', ''),
+                                                 help='API Version e.g. "2023-05-15"',
+                                                 placeholder = 'e.g. 2023-05-15',
+                                                 type='password')
+        azure_openai_api_key = st.text_input("azure_openai_api_key", cfg_private['openai_azure'].get('openai_api_key', ''),
+                                                 help='The actual API key. Likely to be a 32-character string',
+                                                 placeholder = 'e.g. 12333333333333333333333333333332',
+                                                 type='password')
+        azure_openai_api_base = st.text_input("azure_openai_api_base", cfg_private['openai_azure'].get('openai_api_base', ''),
+                                                 help='The base url for the API e.g. "https://api.umgpt.umich.edu/azure-openai-api"',
+                                                 placeholder = 'e.g. https://api.umgpt.umich.edu/azure-openai-api',
+                                                 type='password')
+        azure_openai_organization = st.text_input("azure_openai_organization", cfg_private['openai_azure'].get('openai_organization', ''),
+                                                 help='Your organization code. Likely a short string',
+                                                 placeholder = 'e.g. 123456',
+                                                 type='password')
+        azure_openai_api_type = st.text_input("azure_openai_api_type", cfg_private['openai_azure'].get('openai_api_type', ''),
+                                                 help='The API type. Typically "azure"',
+                                                 placeholder = 'e.g. azure',
+                                                 type='password')
+        with st.container():
+            c_in_azure, c_button_azure = st.columns([10,2])
+            with c_button_azure:
+                st.empty()
+        st.write("---")
+        st.subheader("Google PaLM 2")
+        st.markdown('Follow these [instructions](https://developers.generativeai.google/tutorials/setup) to generate an API key for PaLM 2. You may need to also activate an account with [MakerSuite](https://makersuite.google.com/app/apikey) and enable "early access."')
+        with st.container():
+            c_in_palm, c_button_palm = st.columns([10,2])
+            with c_in_palm:
+                google_palm = st.text_input("Google PaLM 2 API Key", cfg_private['google_palm'].get('google_palm_api', ''),
+                                                 help='The MakerSuite API key e.g. a 32-character string',
+                                                 placeholder='e.g. SATgthsykuE64FgrrrrEervr3S4455t_geyDeGq',
+                                                 type='password')
+        with st.container():
+            with c_button_ocr:
+                st.write("##")
+                st.button("Test OCR", on_click=test_API, args=['google_vision',c_in_ocr, cfg_private,openai_api_key,azure_openai_api_version,azure_openai_api_key,
+                                                                    azure_openai_api_base,azure_openai_organization,azure_openai_api_type,google_vision,google_palm])
+        with st.container():
+            with c_button_openai:
+                st.write("##")
+                st.button("Test OpenAI", on_click=test_API, args=['openai',c_in_openai, cfg_private,openai_api_key,azure_openai_api_version,azure_openai_api_key,
+                                                                    azure_openai_api_base,azure_openai_organization,azure_openai_api_type,google_vision,google_palm])
+        with st.container():
+            with c_button_azure:
+                st.write("##")
+                st.button("Test Azure OpenAI", on_click=test_API, args=['azure_openai',c_in_azure, cfg_private,openai_api_key,azure_openai_api_version,azure_openai_api_key,
+                                                                    azure_openai_api_base,azure_openai_organization,azure_openai_api_type,google_vision,google_palm])
+        with st.container():
+            with c_button_palm:
+                st.write("##")
+                st.button("Test PaLM 2", on_click=test_API, args=['palm',c_in_palm, cfg_private,openai_api_key,azure_openai_api_version,azure_openai_api_key,
+                                                                    azure_openai_api_base,azure_openai_organization,azure_openai_api_type,google_vision,google_palm])
+        st.button("Set API Keys",type='primary', on_click=save_changes_to_API_keys, args=[cfg_private,openai_api_key,azure_openai_api_version,azure_openai_api_key,
+                                                                    azure_openai_api_base,azure_openai_organization,azure_openai_api_type,google_vision,google_palm])
+        if st.button('Proceed to VoucherVision'):
+            st.session_state.proceed_to_private = False
+            st.session_state.proceed_to_main = True
+def test_API(api, message_loc, cfg_private,openai_api_key,azure_openai_api_version,azure_openai_api_key, azure_openai_api_base,azure_openai_organization,azure_openai_api_type,google_vision,google_palm):
+    # Save the API keys
+    save_changes_to_API_keys(cfg_private,openai_api_key,azure_openai_api_version,azure_openai_api_key,azure_openai_api_base,azure_openai_organization,azure_openai_api_type,google_vision,google_palm)
+    with st.spinner('Performing validation checks...'):
+        if api == 'google_vision':
+            print("*** Google Vision OCR API Key ***")
+            try:
+                demo_config_path = os.path.join(st.session_state.dir_home,'demo','validation_configs','google_vision_ocr_test.yaml')
+                demo_images_path = os.path.join(st.session_state.dir_home, 'demo', 'demo_images')
+                demo_out_path = os.path.join(st.session_state.dir_home, 'demo', 'demo_output','run_name')
+                create_google_ocr_yaml_config(demo_config_path, demo_images_path, demo_out_path)
+                voucher_vision_OCR_test(demo_config_path, st.session_state.dir_home, None, demo_images_path)
+                with message_loc:
+                    st.success("Google Vision OCR API Key Valid :white_check_mark:")
+                return True
+            except Exception as e:
+                with message_loc:
+                    st.error(f"Google Vision OCR API Key Failed! {e}")
+                return False
+        elif api == 'openai':
+            print("*** OpenAI API Key ***")
+            try:
+                if run_api_tests('openai'):
+                    with message_loc:
+                        st.success("OpenAI API Key Valid :white_check_mark:")
+                else:
+                    with message_loc:
+                        st.error("OpenAI API Key Failed:exclamation:")
+                    return False
+            except Exception as e:
+                with message_loc:
+                    st.error(f"OpenAI API Key Failed:exclamation: {e}")
+        elif api == 'azure_openai':
+            print("*** Azure OpenAI API Key ***")
+            try:
+                if run_api_tests('azure_openai'):
+                    with message_loc:
+                        st.success("Azure OpenAI API Key Valid :white_check_mark:")
+                else:
+                    with message_loc:
+                        st.error(f"Azure OpenAI API Key Failed:exclamation:")
+                    return False
+            except Exception as e:
+                with message_loc:
+                    st.error(f"Azure OpenAI API Key Failed:exclamation: {e}")
+        elif api == 'palm':
+            print("*** Google PaLM 2 API Key ***")
+            try:
+                if run_api_tests('palm'):
+                    with message_loc:
+                        st.success("Google PaLM 2 API Key Valid :white_check_mark:")
+                else:
+                    with message_loc:
+                        st.error("Google PaLM 2 API Key Failed:exclamation:")
+                    return False
+            except Exception as e:
+                with message_loc:
+                    st.error(f"Google PaLM 2 API Key Failed:exclamation: {e}")
+def save_changes_to_API_keys(cfg_private,openai_api_key,azure_openai_api_version,azure_openai_api_key,
+                             azure_openai_api_base,azure_openai_organization,azure_openai_api_type,google_vision,google_palm):
+    # Update the configuration dictionary with the new values
+    cfg_private['openai']['OPENAI_API_KEY'] = openai_api_key
+    cfg_private['openai_azure']['api_version'] = azure_openai_api_version
+    cfg_private['openai_azure']['openai_api_key'] = azure_openai_api_key
+    cfg_private['openai_azure']['openai_api_base'] = azure_openai_api_base
+    cfg_private['openai_azure']['openai_organization'] = azure_openai_organization
+    cfg_private['openai_azure']['openai_api_type'] = azure_openai_api_type
+    cfg_private['google_cloud']['path_json_file'] = google_vision
+    cfg_private['google_palm']['google_palm_api'] = google_palm
+    # Call the function to write the updated configuration to the YAML file
+    write_config_file(cfg_private, st.session_state.dir_home, filename="PRIVATE_DATA.yaml")
+    st.session_state.private_file = does_private_file_exist()
+# Function to load a YAML file and update session_state
+def load_prompt_yaml(filename):
+    with open(filename, 'r') as file:
+        st.session_state['prompt_info'] = yaml.safe_load(file)
+        st.session_state['instructions'] = st.session_state['prompt_info'].get('instructions', st.session_state['default_instructions'])
+        st.session_state['json_formatting_instructions'] = st.session_state['prompt_info'].get('json_formatting_instructions', st.session_state['default_json_formatting_instructions'] )
+        st.session_state['rules'] = st.session_state['prompt_info'].get('rules', {})
+        st.session_state['mapping'] = st.session_state['prompt_info'].get('mapping', {})
+        st.session_state['LLM'] = st.session_state['prompt_info'].get('LLM', 'gpt')
+        # Placeholder:
+        st.session_state['assigned_columns'] = list(chain.from_iterable(st.session_state['mapping'].values()))
+def save_prompt_yaml(filename):
+    yaml_content = {
+        'instructions': st.session_state['instructions'],
+        'json_formatting_instructions': st.session_state['json_formatting_instructions'],
+        'rules': st.session_state['rules'],
+        'mapping': st.session_state['mapping'],
+        'LLM': st.session_state['LLM']
+    }
+    dir_prompt = os.path.join(st.session_state.dir_home, 'custom_prompts')
+    filepath = os.path.join(dir_prompt, f"{filename}.yaml")
+    with open(filepath, 'w') as file:
+        yaml.safe_dump(yaml_content, file)
+    st.success(f"Prompt saved as '{filename}.yaml'.")
+def check_unique_mapping_assignments():
+    if len(st.session_state['assigned_columns']) != len(set(st.session_state['assigned_columns'])):
+        st.error("Each column name must be assigned to only one category.")
+        return False
+    else:
+        st.success("Mapping confirmed.")
+        return True
+def check_prompt_yaml_filename(fname):
+    # Check if the filename only contains letters, numbers, underscores, and dashes
+    pattern = r'^[\w-]+$'
+    # The \w matches any alphanumeric character and is equivalent to the character class [a-zA-Z0-9_].
+    # The hyphen - is literally matched.
+    if re.match(pattern, fname):
+        return True
+    else:
+        return False
+def btn_load_prompt(selected_yaml_file, dir_prompt):
+    if selected_yaml_file:
+        yaml_file_path = os.path.join(dir_prompt, selected_yaml_file)
+        load_prompt_yaml(yaml_file_path)
+    elif not selected_yaml_file:
+        # Directly assigning default values since no file is selected
+        st.session_state['prompt_info'] = {}
+        st.session_state['instructions'] = st.session_state['default_instructions']
+        st.session_state['json_formatting_instructions'] = st.session_state['default_json_formatting_instructions']
+        st.session_state['rules'] = {}
+        st.session_state['LLM'] = 'gpt'
+        st.session_state['assigned_columns'] = []
+        st.session_state['prompt_info'] = {
+            'instructions': st.session_state['instructions'],
+            'json_formatting_instructions': st.session_state['json_formatting_instructions'],
+            'rules': st.session_state['rules'],
+            'mapping': st.session_state['mapping'],
+            'LLM': st.session_state['LLM']
+        }
+def build_LLM_prompt_config():
+    st.session_state['assigned_columns'] = []
+    st.session_state['default_instructions'] = """1. Refactor the unstructured OCR text into a dictionary based on the JSON structure outlined below.
+2. You should map the unstructured OCR text to the appropriate JSON key and then populate the field based on its rules.
+3. Some JSON key fields are permitted to remain empty if the corresponding information is not found in the unstructured OCR text.
+4. Ignore any information in the OCR text that doesn't fit into the defined JSON structure.
+5. Duplicate dictionary fields are not allowed.
+6. Ensure that all JSON keys are in lowercase.
+7. Ensure that new JSON field values follow sentence case capitalization.
+8. Ensure all key-value pairs in the JSON dictionary strictly adhere to the format and data types specified in the template.
+9. Ensure the output JSON string is valid JSON format. It should not have trailing commas or unquoted keys.
+10. Only return a JSON dictionary represented as a string. You should not explain your answer."""
+    st.session_state['default_json_formatting_instructions'] = """The next section of instructions outlines how to format the JSON dictionary. The keys are the same as those of the final formatted JSON object.
+For each key there is a format requirement that specifies how to transcribe the information for that key.
+The possible formatting options are:
+1. "verbatim transcription" - field is populated with verbatim text from the unformatted OCR.
+2. "spell check transcription" - field is populated with spelling corrected text from the unformatted OCR.
+3. "boolean yes no" - field is populated with only yes or no.
+4. "boolean 1 0" - field is populated with only 1 or 0.
+5. "integer" - field is populated with only an integer.
+6. "[list]" - field is populated from one of the values in the list.
+7. "yyyy-mm-dd" - field is populated with a date in the format year-month-day.
+The desired null value is also given. Populate the field with the null value of the information for that key is not present in the unformatted OCR text."""
+    # Start building the Streamlit app
+    col_prompt_main_left, ___, col_prompt_main_right = st.columns([6,1,3])
+    with col_prompt_main_left:
+        st.title("Custom LLM Prompt Builder")
+        st.subheader('About')
+        st.write("This form allows you to craft a prompt for your specific task.")
+        st.subheader('How it works')
+        st.write("1. Edit this page until you are happy with your instructions. We recommend looking at the basic structure, writing down your prompt inforamtion in a Word document so that it does not randomly disappear, and then copying and pasting that info into this form once your whole prompt structure is defined.")
+        st.write("2. After you enter all of your prompt instructions, click 'Save' and give your file a name.")
+        st.write("3. This file will be saved as a yaml configuration file in the `..VoucherVision/custom_prompts` folder.")
+        st.write("4. When you go back the main VoucherVision page you will now see your custom prompt available in the 'Prompt Version' dropdown menu.")
+        st.write("5. Select your custom prompt. Note, your prompt will only be available for the LLM that you set when filling out the form below.")
+        dir_prompt = os.path.join(st.session_state.dir_home, 'custom_prompts')
+        yaml_files = [f for f in os.listdir(dir_prompt) if f.endswith('.yaml')]
+        col_load_text, col_load_btn = st.columns([8,2])
+        with col_load_text:
+        # Dropdown for selecting a YAML file
+            selected_yaml_file = st.selectbox('Select a prompt YAML file to load:', [''] + yaml_files)
+        with col_load_btn:
+            st.write('##')
+            # Button to load the selected prompt
+            st.button('Load Prompt', on_click=btn_load_prompt, args=[selected_yaml_file, dir_prompt])
+        # Define the options for the dropdown
+        llm_options = ['gpt', 'palm']
+        # Create the dropdown and set the value to session_state['LLM']
+        st.session_state['LLM'] = st.selectbox('Set LLM:', llm_options, index=llm_options.index(st.session_state.get('LLM', 'gpt')))
+        # Instructions Section
+        st.header("Instructions")
+        st.write("These are the general instructions that guide the LLM through the transcription task. We recommend using the default instructions unless you have a specific reason to change them.")
+        st.session_state['instructions'] = st.text_area("Enter instructions:", value=st.session_state['default_instructions'].strip(), height=350, disabled=True)
+        st.write('---')
+        # Column Instructions Section
+        st.header("JSON Formatting Instructions")
+        st.write("The following section tells the LLM how we want to structure the JSON dictionary. We do not recommend changing this section because it would likely result in unstable and inconsistent behavior.")
+        st.session_state['json_formatting_instructions'] = st.text_area("Enter column instructions:", value=st.session_state['default_json_formatting_instructions'], height=350, disabled=True)
+        st.write('---')
+        col_left, col_right = st.columns([6,4])
+        with col_left:
+            st.subheader('Add/Edit Columns')
+            # Initialize rules in session state if not already present
+            if 'rules' not in st.session_state or not st.session_state['rules']:
+                st.session_state['rules']['Dictionary'] = {
+                    "catalog_number": {
+                        "format": "verbatim transcription",
+                        "null_value": "",
+                        "description": "The barcode identifier, typically a number with at least 6 digits, but fewer than 30 digits."
+                    }
+                }
+                st.session_state['rules']['SpeciesName'] = {
+                    "taxonomy": ["Genus_species"]
+                }
+            # Layout for adding a new column name
+            # col_text, col_textbtn = st.columns([8, 2])
+            # with col_text:
+            new_column_name = st.text_input("Enter a new column name:")
+            # with col_textbtn:
+            # st.write('##')
+            if st.button("Add New Column") and new_column_name:
+                if new_column_name not in st.session_state['rules']['Dictionary']:
+                    st.session_state['rules']['Dictionary'][new_column_name] = {"format": "", "null_value": "", "description": ""}
+                    st.success(f"New column '{new_column_name}' added. Now you can edit its properties.")
+                else:
+                    st.error("Column name already exists. Please enter a unique column name.")
+            # Get columns excluding the protected "catalog_number"
+            st.write('#')
+            editable_columns = [col for col in st.session_state['rules']['Dictionary'] if col != "catalog_number"]
+            column_name = st.selectbox("Select a column to edit:", [""] + editable_columns)
+            # Handle rules editing
+            current_rule = st.session_state['rules']['Dictionary'].get(column_name, {
+                "format": "",
+                "null_value": "",
+                "description": ""
+            })
+            if 'selected_column' not in st.session_state:
+                st.session_state['selected_column'] = column_name
+            # Form for input fields
+            with st.form(key='rule_form'):
+                format_options = ["verbatim transcription", "spell check transcription", "boolean yes no", "boolean 1 0", "integer", "[list]", "yyyy-mm-dd"]
+                current_rule["format"] = st.selectbox("Format:", format_options, index=format_options.index(current_rule["format"]) if current_rule["format"] else 0)
+                current_rule["null_value"] = st.text_input("Null value:", value=current_rule["null_value"])
+                current_rule["description"] = st.text_area("Description:", value=current_rule["description"])
+                commit_button = st.form_submit_button("Commit Column")
+            default_rule = {
+                "format": format_options[0],  # default format
+                "null_value": "",  # default null value
+                "description": "",  # default description
+            }
+            if st.session_state['selected_column'] != column_name:
+                # Column has changed. Update the session_state selected column.
+                st.session_state['selected_column'] = column_name
+                # Reset the current rule to the default for this new column, or a blank rule if not set.
+                current_rule = st.session_state['rules']['Dictionary'].get(column_name, default_rule.copy())
+            # Handle commit action
+            if commit_button and column_name:
+                # Commit the rules to the session state.
+                st.session_state['rules']['Dictionary'][column_name] = current_rule.copy()
+                st.success(f"Column '{column_name}' added/updated in rules.")
+                # Force the form to reset by clearing the fields from the session state
+                st.session_state.pop('selected_column', None)  # Clear the selected column to force reset
+                # st.session_state['rules'][column_name] = current_rule
+                # st.success(f"Column '{column_name}' added/updated in rules.")
+                # # Reset current_rule to default values for the next input
+                # current_rule["format"] = default_rule["format"]
+                # current_rule["null_value"] = default_rule["null_value"]
+                # current_rule["description"] = default_rule["description"]
+                # # To ensure that the form fields are reset, we can clear them from the session state
+                # for key in current_rule.keys():
+                #     st.session_state[key] = default_rule[key]
+            # Layout for removing an existing column
+            # del_col, del_colbtn = st.columns([8, 2])
+            # with del_col:
+            delete_column_name = st.selectbox("Select a column to delete:", [""] + editable_columns, key='delete_column')
+            # with del_colbtn:
+            # st.write('##')
+            if st.button("Delete Column") and delete_column_name:
+                del st.session_state['rules'][delete_column_name]
+                st.success(f"Column '{delete_column_name}' removed from rules.")
+        with col_right:
+            # Display the current state of the JSON rules
+            st.subheader('Formatted Columns')
+            st.json(st.session_state['rules']['Dictionary'])
+            # st.subheader('All Prompt Info')
+            # st.json(st.session_state['prompt_info'])
+        st.write('---')
+        col_left_mapping, col_right_mapping = st.columns([6,4])
+        with col_left_mapping:
+            st.header("Mapping")
+            st.write("Assign each column name to a single category.")
+            st.session_state['refresh_mapping'] = False
+            # Dynamically create a list of all column names that can be assigned
+            # This assumes that the column names are the keys in the dictionary under 'rules'
+            all_column_names = list(st.session_state['rules']['Dictionary'].keys())
+            categories = ['TAXONOMY', 'GEOGRAPHY', 'LOCALITY', 'COLLECTING', 'MISCELLANEOUS']
+            if ('mapping' not in st.session_state) or (st.session_state['mapping'] == {}):
+                st.session_state['mapping'] = {category: [] for category in categories}
+            for category in categories:
+                # Filter out the already assigned columns
+                available_columns = [col for col in all_column_names if col not in st.session_state['assigned_columns'] or col in st.session_state['mapping'].get(category, [])]
+                # Ensure the current mapping is a subset of the available options
+                current_mapping = [col for col in st.session_state['mapping'].get(category, []) if col in available_columns]
+                # Provide a safe default if the current mapping is empty or contains invalid options
+                safe_default = current_mapping if all(col in available_columns for col in current_mapping) else []
+                # Create a multi-select widget for the category with a safe default
+                selected_columns = st.multiselect(
+                    f"Select columns for {category}:",
+                    available_columns,
+                    default=safe_default,
+                    key=f"mapping_{category}"
+                )
+                # Update the assigned_columns based on the selections
+                for col in current_mapping:
+                    if col not in selected_columns and col in st.session_state['assigned_columns']:
+                        st.session_state['assigned_columns'].remove(col)
+                        st.session_state['refresh_mapping'] = True
+                for col in selected_columns:
+                    if col not in st.session_state['assigned_columns']:
+                        st.session_state['assigned_columns'].append(col)
+                        st.session_state['refresh_mapping'] = True
+                # Update the mapping in session state when there's a change
+                st.session_state['mapping'][category] = selected_columns
+            if st.session_state['refresh_mapping']:
+                st.session_state['refresh_mapping'] = False
+        # Button to confirm and save the mapping configuration
+        if st.button('Confirm Mapping'):
+            if check_unique_mapping_assignments():
+                # Proceed with further actions since the mapping is confirmed and unique
+                pass
+        with col_right_mapping:
+            # Display the current state of the JSON rules
+            st.subheader('Formatted Column Maps')
+            st.json(st.session_state['mapping'])
+        col_left_save, col_right_save = st.columns([6,4])
+        with col_left_save:
+            # Input for new file name
+            new_filename = st.text_input("Enter filename to save your prompt as a configuration YAML:",placeholder='my_prompt_name')
+            # Button to save the new YAML file
+            if st.button('Save YAML', type='primary'):
+                if new_filename:
+                    if check_unique_mapping_assignments():
+                        if check_prompt_yaml_filename(new_filename):
+                            save_prompt_yaml(new_filename)
+                        else:
+                            st.error("File name can only contain letters, numbers, underscores, and dashes. Cannot contain spaces.")
+                    else:
+                        st.error("Mapping contains an error. Make sure that each column is assigned to only ***one*** category.")
+                else:
+                    st.error("Please enter a filename.")
+            if st.button('Exit'):
+                st.session_state.proceed_to_build_llm_prompt = False
+                st.session_state.proceed_to_main = True
+                st.rerun()
+    with col_prompt_main_right:
+        st.subheader('All Prompt Components')
+        st.session_state['prompt_info'] = {
+            'instructions': st.session_state['instructions'],
+            'json_formatting_instructions': st.session_state['json_formatting_instructions'],
+            'rules': st.session_state['rules'],
+            'mapping': st.session_state['mapping'],
+            'LLM': st.session_state['LLM']
+        }
+        st.json(st.session_state['prompt_info'])
+def save_yaml(content, filename="rules_config.yaml"):
+    with open(filename, 'w') as file:
+        yaml.dump(content, file)
+def show_header_welcome():
+    st.session_state.logo_path = os.path.join(st.session_state.dir_home, 'img','logo.png')
+    st.session_state.logo = Image.open(st.session_state.logo_path)
+    st.image(st.session_state.logo, width=250)
+def content_header():
+    col_run_1, col_run_2, col_run_3 = st.columns([4,2,2])
+    col_test = st.container()
+    st.write("")
+    st.write("")
+    st.write("")
+    st.write("")
+    st.subheader("Overall Progress")
+    col_run_info_1 = st.columns([1])[0]
+    st.write("")
+    st.write("")
+    st.write("")
+    st.write("")
+    st.header("Configuration Settings")
+    with col_run_info_1:
+        # Progress
+        # Progress
+        # st.subheader('Project')
+        # bar = st.progress(0)
+        # new_text = st.empty()  # Placeholder for current step name
+        # progress_report = ProgressReportVV(bar, new_text, n_images=10)
+        # Progress
+        overall_progress_bar = st.progress(0)
+        text_overall = st.empty()  # Placeholder for current step name
+        st.subheader('Transcription Progress')
+        batch_progress_bar = st.progress(0)
+        text_batch = st.empty()  # Placeholder for current step name
+        progress_report = ProgressReport(overall_progress_bar, batch_progress_bar, text_overall, text_batch)
+        st.info("***Note:*** There is a known bug with tabs in Streamlit. If you update an input field it may take you back to the 'Project Settings' tab. Changes that you made are saved, it's just an annoying glitch. We are aware of this issue and will fix it as soon as we can.")
+        st.write("If you use VoucherVision frequently, you can change the default values that are auto-populated in the form below. In a text editor or IDE, edit the first few rows in the file `../VoucherVision/vouchervision/VoucherVision_Config_Builder.py`")
+    with col_run_1:
+        show_header_welcome()
+        st.subheader('Run VoucherVision')
+        if check_if_usable():
+            if st.button("Start Processing", type='primary'):
+                # First, write the config file.
+                write_config_file(st.session_state.config, st.session_state.dir_home, filename="VoucherVision.yaml")
+                path_custom_prompts = os.path.join(st.session_state.dir_home,'custom_prompts',st.session_state.config['leafmachine']['project']['prompt_version'])
+                # Call the machine function.
+                last_JSON_response, total_cost = voucher_vision(None, st.session_state.dir_home, path_custom_prompts, None, progress_report,path_api_cost=os.path.join(st.session_state.dir_home,'api_cost','api_cost.yaml'))
+                if total_cost:
+                    st.success(f":money_with_wings: This run cost :heavy_dollar_sign:{total_cost:.4f}")
+                # Format the JSON string for display.
+                if last_JSON_response is None:
+                    st.markdown(f"Last JSON object in the batch: NONE")
+                else:
+                    try:
+                        formatted_json = json.dumps(json.loads(last_JSON_response), indent=4)
+                    except:
+                        formatted_json = json.dumps(last_JSON_response, indent=4)
+                    st.markdown(f"Last JSON object in the batch:\n```\n{formatted_json}\n```")
+                    st.balloons()
+        else:
+            st.button("Start Processing", type='primary', disabled=True)
+            st.error(":heavy_exclamation_mark: Required API keys not set. Please visit the 'API Keys' tab and set the Google Vision OCR API key and at least one LLM key.")
+    with col_run_2:
+        st.subheader('Run Tests', help="")
+        st.write('We include a single image for testing. If you want to test all of the available prompts and LLMs on a different set of images, copy your images into `../VoucherVision/demo/demo_images`.')
+        if st.button("Test GPT"):
+            progress_report.set_n_overall(TestOptionsGPT.get_length())
+            test_results, JSON_results = run_demo_tests_GPT(progress_report)
+            with col_test:
+                display_test_results(test_results, JSON_results, 'gpt')
+            st.balloons()
+        if st.button("Test PaLM2"):
+            progress_report.set_n_overall(TestOptionsPalm.get_length())
+            test_results, JSON_results = run_demo_tests_Palm(progress_report)
+            with col_test:
+                display_test_results(test_results, JSON_results, 'palm')
+            st.balloons()
+    with col_run_3:
+        st.subheader('Check GPU')
+        if st.button("GPU"):
+            success, info = test_GPU()
+            if success:
+                st.balloons()
+                for message in info:
+                    st.success(message)
+            else:
+                for message in info:
+                    st.error(message)
+def content_tab_settings():
+    st.header('Project')
+    col_project_1, col_project_2 = st.columns([4,2])
+    st.write("---")
+    st.header('Input Images')
+    col_local_1, col_local_2 = st.columns([4,2])
+    # st.write("---")
+    # st.header('Modules')
+    # col_m1, col_m2 = st.columns(2)
+    st.write("---")
+    st.header('Cropped Components')
+    col_cropped_1, col_cropped_2 = st.columns([4,4])
+    os.path.join(st.session_state.dir_home, )
+    ### Project
+    with col_project_1:
+        st.session_state.config['leafmachine']['project']['run_name'] = st.text_input("Run name", st.session_state.config['leafmachine']['project'].get('run_name', ''))
+        st.session_state.config['leafmachine']['project']['dir_output'] = st.text_input("Output directory", st.session_state.config['leafmachine']['project'].get('dir_output', ''))
+    ### Input Images Local
+    with col_local_1:
+        st.session_state.config['leafmachine']['project']['dir_images_local'] = st.text_input("Input images directory", st.session_state.config['leafmachine']['project'].get('dir_images_local', ''))
+        st.session_state.config['leafmachine']['project']['continue_run_from_partial_xlsx'] = st.text_input("Continue run from partially completed project XLSX", st.session_state.config['leafmachine']['project'].get('continue_run_from_partial_xlsx', ''), disabled=True)
+        st.write("---")
+        st.subheader('LLM Version')
+        st.markdown(
+            """
+            ***Note:*** GPT-4 is 20x more expensive than GPT-3.5
+            """
+            )
+        st.session_state.config['leafmachine']['LLM_version'] = st.selectbox("LLM version", ["GPT 4", "GPT 3.5", "Azure GPT 4", "Azure GPT 3.5", "PaLM 2"], index=["GPT 4", "GPT 3.5", "Azure GPT 4", "Azure GPT 3.5", "PaLM 2"].index(st.session_state.config['leafmachine'].get('LLM_version', 'Azure GPT 4')))
+        st.write("---")
+        st.subheader('Prompt Version')
+        versions, default_version = get_prompt_versions(st.session_state.config['leafmachine']['LLM_version'])
+        if versions:
+            selected_version = st.session_state.config['leafmachine']['project'].get('prompt_version', default_version)
+            if selected_version not in versions:
+                selected_version = default_version
+            st.session_state.config['leafmachine']['project']['prompt_version'] = st.selectbox("Prompt Version", versions, index=versions.index(selected_version))
+        # if st.session_state.config['leafmachine']['LLM_version'] in ["GPT 4", "GPT 3.5", "Azure GPT 4", "Azure GPT 3.5",]:
+        #     st.session_state.config['leafmachine']['project']['prompt_version'] = st.selectbox("Prompt Version", ["Version 1", "Version 1 No Domain Knowledge", "Version 2"], index=["Version 1", "Version 1 No Domain Knowledge", "Version 2"].index(st.session_state.config['leafmachine']['project'].get('prompt_version', "Version 2")))
+        # elif st.session_state.config['leafmachine']['LLM_version'] in ["PaLM 2",]:
+        #     st.session_state.config['leafmachine']['project']['prompt_version'] = st.selectbox("Prompt Version", ["Version 1 PaLM 2", "Version 1 PaLM 2 No Domain Knowledge", "Version 2 PaLM 2"], index=["Version 1 PaLM 2", "Version 1 PaLM 2 No Domain Knowledge", "Version 2 PaLM 2"].index(st.session_state.config['leafmachine']['project'].get('prompt_version', "Version 2 PaLM 2")))
+    ### Modules
+    # with col_m1:
+    #     st.session_state.config['leafmachine']['modules']['specimen_crop'] = st.checkbox("Specimen Close-up", st.session_state.config['leafmachine']['modules'].get('specimen_crop', True),disabled=True)
+    ### cropped_components
+    # with col_cropped_1:
+    #     st.session_state.config['leafmachine']['cropped_components']['do_save_cropped_annotations'] = st.checkbox("Save cropped components as images", st.session_state.config['leafmachine']['cropped_components'].get('do_save_cropped_annotations', True), disabled=True)
+    #     st.session_state.config['leafmachine']['cropped_components']['save_per_image'] = st.checkbox("Save cropped components grouped by specimen", st.session_state.config['leafmachine']['cropped_components'].get('save_per_image', False), disabled=True)
+    #     st.session_state.config['leafmachine']['cropped_components']['save_per_annotation_class'] = st.checkbox("Save cropped components grouped by type", st.session_state.config['leafmachine']['cropped_components'].get('save_per_annotation_class', True), disabled=True)
+    #     st.session_state.config['leafmachine']['cropped_components']['binarize_labels'] = st.checkbox("Binarize labels", st.session_state.config['leafmachine']['cropped_components'].get('binarize_labels', False), disabled=True)
+    #     st.session_state.config['leafmachine']['cropped_components']['binarize_labels_skeletonize'] = st.checkbox("Binarize and skeletonize labels", st.session_state.config['leafmachine']['cropped_components'].get('binarize_labels_skeletonize', False), disabled=True)
+    with col_cropped_1:
+        default_crops = st.session_state.config['leafmachine']['cropped_components'].get('save_cropped_annotations', ['leaf_whole'])
+        st.write("Prior to transcription, use LeafMachine2 to crop all labels from input images to create label collages for each specimen image. (Requires GPU)")
+        st.session_state.config['leafmachine']['use_RGB_label_images'] = st.checkbox("Use LeafMachine2 label collage for transcriptions", st.session_state.config['leafmachine'].get('use_RGB_label_images', False))
+        st.session_state.config['leafmachine']['cropped_components']['save_cropped_annotations'] = st.multiselect("Components to crop",
+                ['ruler', 'barcode','label', 'colorcard','map','envelope','photo','attached_item','weights',
+                'leaf_whole', 'leaf_partial', 'leaflet', 'seed_fruit_one', 'seed_fruit_many', 'flower_one', 'flower_many', 'bud','specimen','roots','wood'],default=default_crops)
+    with col_cropped_2:
+        ba = os.path.join(st.session_state.dir_home,'demo', 'ba','ba2.png')
+        image = Image.open(ba)
+        st.image(image, caption='LeafMachine2 Collage', output_format = "PNG")
+def content_tab_component():
+    st.header('Archival Components')
+    ACD_version = st.selectbox("Archival Component Detector (ACD) Version", ["Version 2.1", "Version 2.2"])
+    ACD_confidence_default = int(st.session_state.config['leafmachine']['archival_component_detector']['minimum_confidence_threshold'] * 100)
+    ACD_confidence = st.number_input("ACD Confidence Threshold (%)", min_value=0, max_value=100,value=ACD_confidence_default)
+    st.session_state.config['leafmachine']['archival_component_detector']['minimum_confidence_threshold'] = float(ACD_confidence/100)
+    st.session_state.config['leafmachine']['archival_component_detector']['do_save_prediction_overlay_images'] = st.checkbox("Save Archival Prediction Overlay Images", st.session_state.config['leafmachine']['archival_component_detector'].get('do_save_prediction_overlay_images', True))
+    st.session_state.config['leafmachine']['archival_component_detector']['ignore_objects_for_overlay'] = st.multiselect("Hide Archival Components in Prediction Overlay Images",
+                ['ruler', 'barcode','label', 'colorcard','map','envelope','photo','attached_item','weights',],
+                default=[])
+    # Depending on the selected version, set the configuration
+    if ACD_version == "Version 2.1":
+        st.session_state.config['leafmachine']['archival_component_detector']['detector_type'] = 'Archival_Detector'
+        st.session_state.config['leafmachine']['archival_component_detector']['detector_version'] = 'PREP_final'
+        st.session_state.config['leafmachine']['archival_component_detector']['detector_iteration'] = 'PREP_final'
+        st.session_state.config['leafmachine']['archival_component_detector']['detector_weights'] = 'best.pt'
+    elif ACD_version == "Version 2.2": #TODO update this to version 2.2
+        st.session_state.config['leafmachine']['archival_component_detector']['detector_type'] = 'Archival_Detector'
+        st.session_state.config['leafmachine']['archival_component_detector']['detector_version'] = 'PREP_final'
+        st.session_state.config['leafmachine']['archival_component_detector']['detector_iteration'] = 'PREP_final'
+        st.session_state.config['leafmachine']['archival_component_detector']['detector_weights'] = 'best.pt'
+def content_tab_processing():
+    st.header('Processing Options')
+    col_processing_1, col_processing_2 = st.columns([2,2,])
+    with col_processing_1:
+        st.subheader('Compute Options')
+        st.session_state.config['leafmachine']['project']['num_workers'] = st.number_input("Number of CPU workers", value=st.session_state.config['leafmachine']['project'].get('num_workers', 1), disabled=True)
+        st.session_state.config['leafmachine']['project']['batch_size'] = st.number_input("Batch size", value=st.session_state.config['leafmachine']['project'].get('batch_size', 500), help='Sets the batch size for the LeafMachine2 cropping. If computer RAM is filled, lower this value to ~100.')
+    with col_processing_2:
+        st.subheader('Misc')
+        st.session_state.config['leafmachine']['project']['prefix_removal'] = st.text_input("Remove prefix from catalog number", st.session_state.config['leafmachine']['project'].get('prefix_removal', ''))
+        st.session_state.config['leafmachine']['project']['suffix_removal'] = st.text_input("Remove suffix from catalog number", st.session_state.config['leafmachine']['project'].get('suffix_removal', ''))
+        st.session_state.config['leafmachine']['project']['catalog_numerical_only'] = st.checkbox("Require 'Catalog Number' to be numerical only", st.session_state.config['leafmachine']['project'].get('catalog_numerical_only', True))
+    ### Logging and Image Validation - col_v1
+    st.header('Logging and Image Validation')
+    col_v1, col_v2 = st.columns(2)
+    with col_v1:
+        st.session_state.config['leafmachine']['do']['check_for_illegal_filenames'] = st.checkbox("Check for illegal filenames", st.session_state.config['leafmachine']['do'].get('check_for_illegal_filenames', True))
+        st.session_state.config['leafmachine']['do']['check_for_corrupt_images_make_vertical'] = st.checkbox("Check for corrupt images", st.session_state.config['leafmachine']['do'].get('check_for_corrupt_images_make_vertical', True))
+        st.session_state.config['leafmachine']['print']['verbose'] = st.checkbox("Print verbose", st.session_state.config['leafmachine']['print'].get('verbose', True))
+        st.session_state.config['leafmachine']['print']['optional_warnings'] = st.checkbox("Show optional warnings", st.session_state.config['leafmachine']['print'].get('optional_warnings', True))
+    with col_v2:
+        log_level = st.session_state.config['leafmachine']['logging'].get('log_level', None)
+        log_level_display = log_level if log_level is not None else 'default'
+        selected_log_level = st.selectbox("Logging Level", ['default', 'DEBUG', 'INFO', 'WARNING', 'ERROR'], index=['default', 'DEBUG', 'INFO', 'WARNING', 'ERROR'].index(log_level_display))
+        if selected_log_level == 'default':
+            st.session_state.config['leafmachine']['logging']['log_level'] = None
+        else:
+            st.session_state.config['leafmachine']['logging']['log_level'] = selected_log_level
+def content_tab_domain():
+    st.header('Embeddings Database')
+    col_emb_1, col_emb_2 = st.columns([4,2])
+    with col_emb_1:
+        st.markdown(
+            """
+            VoucherVision includes the option of using domain knowledge inside of the dynamically generated prompts. The OCR text is queried against a database of existing label transcriptions. The most similar existing transcriptions act as an example of what the LLM should emulate and are shown to the LLM as JSON objects. VoucherVision uses cosine similarity search to return the most similar existing transcription.
+            - Note: Using domain knowledge may increase the chance that foreign text is included in the final transcription
+            - Disabling this feature will show the LLM multiple examples of an empty JSON skeleton structure instead
+            - Enabling this option requires a GPU with at least 8GB of VRAM
+            - The domain knowledge files can be located in the directory "../VoucherVision/domain_knowledge". On first run the embeddings database must be created, which takes time. If the database creation runs each time you use VoucherVision, then something is wrong.
+            """
+            )
+        st.write(f"Domain Knowledge is only available for the following prompts:")
+        for available_prompts in PROMPTS_THAT_NEED_DOMAIN_KNOWLEDGE:
+            st.markdown(f"- {available_prompts}")
+        if st.session_state.config['leafmachine']['project']['prompt_version'] in PROMPTS_THAT_NEED_DOMAIN_KNOWLEDGE:
+            st.session_state.config['leafmachine']['project']['use_domain_knowledge'] = st.checkbox("Use domain knowledge", True, disabled=True)
+        else:
+            st.session_state.config['leafmachine']['project']['use_domain_knowledge'] = st.checkbox("Use domain knowledge", False, disabled=True)
+        st.write("")
+        if st.session_state.config['leafmachine']['project']['use_domain_knowledge']:
+            st.session_state.config['leafmachine']['project']['embeddings_database_name'] = st.text_input("Embeddings database name (only use underscores)", st.session_state.config['leafmachine']['project'].get('embeddings_database_name', ''))
+            st.session_state.config['leafmachine']['project']['build_new_embeddings_database'] = st.checkbox("Build *new* embeddings database", st.session_state.config['leafmachine']['project'].get('build_new_embeddings_database', False))
+            st.session_state.config['leafmachine']['project']['path_to_domain_knowledge_xlsx'] = st.text_input("Path to domain knowledge CSV file (will be used to create new embeddings database)", st.session_state.config['leafmachine']['project'].get('path_to_domain_knowledge_xlsx', ''))
+        else:
+            st.session_state.config['leafmachine']['project']['embeddings_database_name'] = st.text_input("Embeddings database name (only use underscores)", st.session_state.config['leafmachine']['project'].get('embeddings_database_name', ''), disabled=True)
+            st.session_state.config['leafmachine']['project']['build_new_embeddings_database'] = st.checkbox("Build *new* embeddings database", st.session_state.config['leafmachine']['project'].get('build_new_embeddings_database', False), disabled=True)
+            st.session_state.config['leafmachine']['project']['path_to_domain_knowledge_xlsx'] = st.text_input("Path to domain knowledge CSV file (will be used to create new embeddings database)", st.session_state.config['leafmachine']['project'].get('path_to_domain_knowledge_xlsx', ''), disabled=True)
+def render_expense_report_summary():
+    expense_summary = st.session_state.expense_summary
+    expense_report = st.session_state.expense_report
+    st.header('Expense Report Summary')
+    if expense_summary:
+        st.metric(label="Total Cost", value=f"${round(expense_summary['total_cost_sum'], 4):,}")
+        col1, col2 = st.columns(2)
+        # Run count and total costs
+        with col1:
+            st.metric(label="Run Count", value=expense_summary['run_count'])
+            st.metric(label="Tokens In", value=f"{expense_summary['tokens_in_sum']:,}")
+        # Token information
+        with col2:
+            st.metric(label="Total Images", value=expense_summary['n_images_sum'])
+            st.metric(label="Tokens Out", value=f"{expense_summary['tokens_out_sum']:,}")
+        # Calculate cost proportion per image for each API version
+        st.subheader('Average Cost per Image by API Version')
+        cost_labels = []
+        cost_values = []
+        total_images = 0
+        cost_per_image_dict = {}
+        # Iterate through the expense report to accumulate costs and image counts
+        for index, row in expense_report.iterrows():
+            api_version = row['api_version']
+            total_cost = row['total_cost']
+            n_images = row['n_images']
+            total_images += n_images  # Keep track of total images processed
+            if api_version not in cost_per_image_dict:
+                cost_per_image_dict[api_version] = {'total_cost': 0, 'n_images': 0}
+            cost_per_image_dict[api_version]['total_cost'] += total_cost
+            cost_per_image_dict[api_version]['n_images'] += n_images
+        api_versions = list(cost_per_image_dict.keys())
+        colors = [COLORS_EXPENSE_REPORT[version] if version in COLORS_EXPENSE_REPORT else '#DDDDDD' for version in api_versions]
+        # Calculate the cost per image for each API version
+        for version, cost_data in cost_per_image_dict.items():
+            total_cost = cost_data['total_cost']
+            n_images = cost_data['n_images']
+            # Calculate the cost per image for this version
+            cost_per_image = total_cost / n_images if n_images > 0 else 0
+            cost_labels.append(version)
+            cost_values.append(cost_per_image)
+        # Generate the pie chart
+        cost_pie_chart = go.Figure(data=[go.Pie(labels=cost_labels, values=cost_values, hole=.3)])
+        # Update traces for custom text in hoverinfo, displaying cost with a dollar sign and two decimal places
+        cost_pie_chart.update_traces(
+            marker=dict(colors=colors),
+            text=[f"${value:.2f}" for value in cost_values],  # Formats the cost as a string with a dollar sign and two decimals
+            textinfo='percent+label',
+            hoverinfo='label+percent+text'  # Adds custom text (formatted cost) to the hover information
+        )
+        st.plotly_chart(cost_pie_chart, use_container_width=True)
+        st.subheader('Proportion of Total Cost by API Version')
+        cost_labels = []
+        cost_proportions = []
+        total_cost_by_version = {}
+        # Sum the total cost for each API version
+        for index, row in expense_report.iterrows():
+            api_version = row['api_version']
+            total_cost = row['total_cost']
+            if api_version not in total_cost_by_version:
+                total_cost_by_version[api_version] = 0
+            total_cost_by_version[api_version] += total_cost
+        # Calculate the combined total cost for all versions
+        combined_total_cost = sum(total_cost_by_version.values())
+        # Calculate the proportion of total cost for each API version
+        for version, total_cost in total_cost_by_version.items():
+            proportion = (total_cost / combined_total_cost) * 100 if combined_total_cost > 0 else 0
+            cost_labels.append(version)
+            cost_proportions.append(proportion)
+        # Generate the pie chart
+        cost_pie_chart = go.Figure(data=[go.Pie(labels=cost_labels, values=cost_proportions, hole=.3)])
+        # Update traces for custom text in hoverinfo
+        cost_pie_chart.update_traces(
+            marker=dict(colors=colors),
+            text=[f"${cost:.2f}" for cost in total_cost_by_version.values()],  # This will format the cost to 2 decimal places
+            textinfo='percent+label',
+            hoverinfo='label+percent+text'  # This tells Plotly to show the label, percent, and custom text (cost) on hover
+        )
+        st.plotly_chart(cost_pie_chart, use_container_width=True)
+        # API version usage percentages pie chart
+        st.subheader('Runs by API Version')
+        api_versions = list(expense_summary['api_version_percentages'].keys())
+        percentages = [expense_summary['api_version_percentages'][version] for version in api_versions]
+        pie_chart = go.Figure(data=[go.Pie(labels=api_versions, values=percentages, hole=.3)])
+        pie_chart.update_layout(margin=dict(t=0, b=0, l=0, r=0))
+        pie_chart.update_traces(marker=dict(colors=colors),)
+        st.plotly_chart(pie_chart, use_container_width=True)
+    else:
+        st.error('No expense report data available.')
+def sidebar_content():
+    try:
+        validate_dir(os.path.join(st.session_state.dir_home,'expense_report'))
+        st.session_state.expense_summary, st.session_state.expense_report = summarize_expense_report(os.path.join(st.session_state.dir_home,'expense_report','expense_report.csv'))
+        render_expense_report_summary()
+    except:
+        st.header('Expense Report Summary')
+        st.write('Available after first run...')
+    # # Check if the expense summary is available in the session state
+    # if 'expense' not in st.session_state or st.session_state.expense is None:
+    #     st.sidebar.write('No expense report data available.')
+    #     return
+    # # Retrieve the expense report summary
+    # expense_summary = st.session_state.expense
+    # # Display the expense report summary
+    # st.sidebar.markdown('**Run Count**: ' + str(expense_summary['run_count']))
+    # # API version usage percentages
+    # st.sidebar.markdown('**API Version Usage**:')
+    # for version, percentage in expense_summary['api_version_percentages'].items():
+    #     st.sidebar.markdown(f'- {version}: {percentage:.2f}%')
+    # # Summary of costs and tokens
+    # st.sidebar.markdown('**Total Cost**: $' + str(round(expense_summary['total_cost_sum'], 4)))
+    # st.sidebar.markdown('**Tokens In**: ' + str(expense_summary['tokens_in_sum']))
+    # st.sidebar.markdown('**Tokens Out**: ' + str(expense_summary['tokens_out_sum']))
+    # # st.sidebar.markdown('**Rate In**: $' + str(round(expense_summary['rate_in_sum'], 2)) + ' per 1000 tokens')
+    # # st.sidebar.markdown('**Rate Out**: $' + str(round(expense_summary['rate_out_sum'], 2)) + ' per 1000 tokens')
+    # st.sidebar.markdown('**Cost In**: $' + str(round(expense_summary['cost_in_sum'], 4)))
+    # st.sidebar.markdown('**Cost Out**: $' + str(round(expense_summary['cost_out_sum'], 4)))
+def main():
+    with st.sidebar:
+        sidebar_content()
+    # Main App
+    content_header()
+    tab_settings, tab_prompt, tab_domain, tab_component, tab_processing, tab_private, tab_delete = st.tabs(["Project Settings", "Prompt Builder", "Domain Knowledge","Component Detector", "Processing Options", "API Keys", "Space-Saver"])
+    with tab_settings:
+        content_tab_settings()
+    with tab_prompt:
+        if st.button("Build Custom LLM Prompt"):
+            st.session_state.proceed_to_build_llm_prompt = True
+            st.rerun()
+    with tab_component:
+        content_tab_component()
+    with tab_domain:
+        content_tab_domain()
+    with tab_processing:
+        content_tab_processing()
+    with tab_private:
+        if st.button("Edit API Keys"):
+            st.session_state.proceed_to_private = True
+            st.rerun()
+    with tab_delete:
+        create_space_saver()
+st.set_page_config(layout="wide", page_icon='img/icon.ico', page_title='VoucherVision')
+# Default YAML file path
+if 'config' not in st.session_state:
+    st.session_state.config, st.session_state.dir_home = build_VV_config()
+    setup_streamlit_config(st.session_state.dir_home)
+if 'proceed_to_main' not in st.session_state:
+    st.session_state.proceed_to_main = False  # New state variable to control the flow
+if 'proceed_to_build_llm_prompt' not in st.session_state:
+    st.session_state.proceed_to_build_llm_prompt = False  # New state variable to control the flow
+if 'proceed_to_private' not in st.session_state:
+    st.session_state.proceed_to_private = False  # New state variable to control the flow
+if 'private_file' not in st.session_state:
+    st.session_state.private_file = does_private_file_exist()
+    if st.session_state.private_file:
+        st.session_state.proceed_to_main = True
+# Initialize session_state variables if they don't exist
+if 'prompt_info' not in st.session_state:
+    st.session_state['prompt_info'] = {}
+if 'rules' not in st.session_state:
+    st.session_state['rules'] = {}
+if not st.session_state.private_file:
+    create_private_file()
+elif st.session_state.proceed_to_build_llm_prompt:
+    build_LLM_prompt_config()
+elif st.session_state.proceed_to_private:
+    create_private_file()
+elif st.session_state.proceed_to_main:
+    main()

bin/version.yml ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ last_update: '2023-10-24'
2	+ version: v-2-1

create_desktop_shortcut.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import os, sys
+import win32com.client
+import tkinter as tk
+from tkinter import filedialog
+from PIL import Image, ImageEnhance
+def create_shortcut():
+    # Request user's confirmation
+    confirmation = input("Do you want to create a shortcut for the VoucherVision? (y/n): ")
+    if confirmation.lower() != "y":
+        print("Okay, no shortcut will be created.")
+        return
+    # Get the script path
+    script_path = os.path.abspath(__file__)
+    #  Get the directory of the script
+    script_dir = os.path.dirname(script_path)
+    # Path to the icon file
+    icon_path = os.path.join(script_dir, 'img', 'icon.jpg')
+    img = Image.open(icon_path)
+    enhancer = ImageEnhance.Color(img)
+    img_enhanced = enhancer.enhance(1.5)
+    img_enhanced.save(os.path.join(script_dir, 'img', 'icon.ico'), format='ICO', sizes=[(256,256)])
+    icon_path_ico = os.path.join(script_dir, 'img', 'icon.ico')
+    # Construct the path to the static folder
+    static_dir = os.path.join(script_dir, "static")
+    # Ask for the name of the shortcut
+    shortcut_name = "Voucher Vision"
+    root = tk.Tk()
+    root.withdraw()  # Hide the main window
+    root.update()  # Ensures that the dialog appears on top
+    folder_path = filedialog.askdirectory(title="Choose location to save the shortcut")
+    print(f"Shortcut will be saved to {folder_path}")
+    venv_path = filedialog.askdirectory(title="Choose the location of your Python virtual environment")
+    print(f"Using virtual environment located at {venv_path}")
+    # Path to the activate script in the venv
+    activate_path = os.path.join(venv_path, "Scripts")
+    shortcut_path = os.path.join(folder_path, f'{shortcut_name}.lnk')
+    shell = win32com.client.Dispatch("WScript.Shell")
+    shortcut = shell.CreateShortCut(shortcut_path)
+    shortcut.Targetpath = "%windir%\System32\cmd.exe"
+    # The command activates the venv, navigates to the script's directory, then runs the script
+    # shortcut.Arguments = f'/K "{activate_path} & cd /D {os.path.dirname(script_path)} & streamlit run VoucherVisionEditor.py"'
+    # shortcut.Arguments = f'/K "{activate_path} & cd /D {static_dir} & start cmd /c python -m http.server & cd /D {script_dir} & streamlit run VoucherVisionEditor.py"'
+    streamlit_exe = os.path.join(venv_path, "Scripts","streamlit")
+    print(script_dir)
+    print(streamlit_exe)
+    activate_path = os.path.join(script_dir,"venv_VV","Scripts")
+    print(activate_path)
+    shortcut.Arguments = f'/K cd /D ""{activate_path}"" && activate && cd /D ""{script_dir}"" && python run_VoucherVision.py'
+    # Set the icon of the shortcut
+    shortcut.IconLocation = icon_path_ico
+    shortcut.save()
+    print(f"Shortcut created with the name '{shortcut_name}' in the chosen directory.")
+if __name__ == "__main__":
+    create_shortcut()

custom_prompts/required_structure.yaml ADDED Viewed

	@@ -0,0 +1,62 @@

+LLM: gpt
+instructions: '1. Refactor the unstructured OCR text into a dictionary based on the
+  JSON structure outlined below.
+  2. You should map the unstructured OCR text to the appropriate JSON key and then
+  populate the field based on its rules.
+  3. Some JSON key fields are permitted to remain empty if the corresponding information
+  is not found in the unstructured OCR text.
+  4. Ignore any information in the OCR text that doesn''t fit into the defined JSON
+  structure.
+  5. Duplicate dictionary fields are not allowed.
+  6. Ensure that all JSON keys are in lowercase.
+  7. Ensure that new JSON field values follow sentence case capitalization.
+  8. Ensure all key-value pairs in the JSON dictionary strictly adhere to the format
+  and data types specified in the template.
+  9. Ensure the output JSON string is valid JSON format. It should not have trailing
+  commas or unquoted keys.
+  10. Only return a JSON dictionary represented as a string. You should not explain
+  your answer.'
+json_formatting_instructions: "The next section of instructions outlines how to format\
+  \ the JSON dictionary. The keys are the same as those of the final formatted JSON\
+  \ object.\nFor each key there is a format requirement that specifies how to transcribe\
+  \ the information for that key. \nThe possible formatting options are:\n1. \"verbatim\
+  \ transcription\" - field is populated with verbatim text from the unformatted OCR.\n\
+  2. \"spell check transcription\" - field is populated with spelling corrected text\
+  \ from the unformatted OCR.\n3. \"boolean yes no\" - field is populated with only\
+  \ yes or no.\n4. \"boolean 1 0\" - field is populated with only 1 or 0.\n5. \"integer\"\
+  \ - field is populated with only an integer.\n6. \"[list]\" - field is populated\
+  \ from one of the values in the list.\n7. \"yyyy-mm-dd\" - field is populated with\
+  \ a date in the format year-month-day.\nThe desired null value is also given. Populate\
+  \ the field with the null value of the information for that key is not present in\
+  \ the unformatted OCR text."
+mapping:
+  # Add column names to the desired category. This is used to map the VV Editor.
+  COLLECTING: []
+  GEOGRAPHY: []
+  LOCALITY: []
+  MISCELLANEOUS: []
+  TAXONOMY:
+  - catalog_number
+rules:
+  Dictionary:
+    # Manually add rows here. You MUST keep 'catalog_number' unchanged. Use 'catalog_number' as a guide for adding more columns.
+    # The only values allowed in the 'format' key are those outlines above in the 'json_formatting_instructions' section.
+    # If you want an empty cell by default, use '' for the 'null_value'.
+    catalog_number:
+      description: The barcode identifier, typically a number with at least 6 digits,
+        but fewer than 30 digits.
+      format: verbatim transcription
+      null_value: ''
+  # Do not change or remove below. This is required for some LLMs
+  SpeciesName:
+    taxonomy:
+    - Genus_species

custom_prompts/version_2.yaml ADDED Viewed

	@@ -0,0 +1,229 @@

+LLM: gpt
+instructions: '1. Refactor the unstructured OCR text into a dictionary based on the
+  JSON structure outlined below.
+  2. You should map the unstructured OCR text to the appropriate JSON key and then
+  populate the field based on its rules.
+  3. Some JSON key fields are permitted to remain empty if the corresponding information
+  is not found in the unstructured OCR text.
+  4. Ignore any information in the OCR text that doesn''t fit into the defined JSON
+  structure.
+  5. Duplicate dictionary fields are not allowed.
+  6. Ensure that all JSON keys are in lowercase.
+  7. Ensure that new JSON field values follow sentence case capitalization.
+  8. Ensure all key-value pairs in the JSON dictionary strictly adhere to the format
+  and data types specified in the template.
+  9. Ensure the output JSON string is valid JSON format. It should not have trailing
+  commas or unquoted keys.
+  10. Only return a JSON dictionary represented as a string. You should not explain
+  your answer.'
+json_formatting_instructions: "The next section of instructions outlines how to format\
+  \ the JSON dictionary. The keys are the same as those of the final formatted JSON\
+  \ object.\nFor each key there is a format requirement that specifies how to transcribe\
+  \ the information for that key. \nThe possible formatting options are:\n1. \"verbatim\
+  \ transcription\" - field is populated with verbatim text from the unformatted OCR.\n\
+  2. \"spell check transcription\" - field is populated with spelling corrected text\
+  \ from the unformatted OCR.\n3. \"boolean yes no\" - field is populated with only\
+  \ yes or no.\n4. \"boolean 1 0\" - field is populated with only 1 or 0.\n5. \"integer\"\
+  \ - field is populated with only an integer.\n6. \"[list]\" - field is populated\
+  \ from one of the values in the list.\n7. \"yyyy-mm-dd\" - field is populated with\
+  \ a date in the format year-month-day.\nThe desired null value is also given. Populate\
+  \ the field with the null value of the information for that key is not present in\
+  \ the unformatted OCR text."
+mapping:
+  COLLECTING:
+  - collectors
+  - collector_number
+  - determined_by
+  - multiple_names
+  - verbatim_date
+  - date
+  - end_date
+  GEOGRAPHY:
+  - country
+  - state
+  - county
+  - min_elevation
+  - max_elevation
+  - elevation_units
+  LOCALITY:
+  - locality_name
+  - verbatim_coordinates
+  - decimal_coordinates
+  - datum
+  - plant_description
+  - cultivated
+  - habitat
+  MISCELLANEOUS: []
+  TAXONOMY:
+  - catalog_number
+  - genus
+  - species
+  - subspecies
+  - variety
+  - forma
+rules:
+  Dictionary:
+    catalog_number:
+      description: The barcode identifier, typically a number with at least 6 digits,
+        but fewer than 30 digits.
+      format: verbatim transcription
+      null_value: ''
+    collector_number:
+      description: Unique identifier or number that denotes the specific collecting
+        event and associated with the collector.
+      format: verbatim transcription
+      null_value: s.n.
+    collectors:
+      description: Full name(s) of the individual(s) responsible for collecting the
+        specimen. When multiple collectors are involved, their names should be separated
+        by commas.
+      format: verbatim transcription
+      null_value: not present
+    country:
+      description: Country that corresponds to the current geographic location of
+        collection. Capitalize first letter of each word. If abbreviation is given
+        populate field with the full spelling of the country's name.
+      format: spell check transcription
+      null_value: ''
+    county:
+      description: Administrative division 2 that corresponds to the current geographic
+        location of collection; capitalize first letter of each word. Administrative
+        division 2 is equivalent to a U.S. county, parish, borough.
+      format: spell check transcription
+      null_value: ''
+    cultivated:
+      description: Cultivated plants are intentionally grown by humans. In text descriptions,
+        look for planting dates, garden locations, ornamental, cultivar names, garden,
+        or farm to indicate cultivated plant.
+      format: boolean yes no
+      null_value: ''
+    date:
+      description: 'Date the specimen was collected formatted as year-month-day. If
+        specific components of the date are unknown, they should be replaced with
+        zeros. Examples: ''0000-00-00'' if the entire date is unknown, ''YYYY-00-00''
+        if only the year is known, and ''YYYY-MM-00'' if year and month are known
+        but day is not.'
+      format: yyyy-mm-dd
+      null_value: ''
+    datum:
+      description: Datum of location coordinates. Possible values are include in the
+        format list. Leave field blank if unclear. [WGS84, WGS72, WGS66, WGS60, NAD83,
+        NAD27, OSGB36, ETRS89, ED50, GDA94, JGD2011, Tokyo97, KGD2002, TWD67, TWD97,
+        BJS54, XAS80, GCJ-02, BD-09, PZ-90.11, GTRF, CGCS2000, ITRF88, ITRF89, ITRF90,
+        ITRF91, ITRF92, ITRF93, ITRF94, ITRF96, ITRF97, ITRF2000, ITRF2005, ITRF2008,
+        ITRF2014, Hong Kong Principal Datum, SAD69]
+      format: '[list]'
+      null_value: ''
+    decimal_coordinates:
+      description: Correct and convert the verbatim location coordinates to conform
+        with the decimal degrees GPS coordinate format.
+      format: spell check transcription
+      null_value: ''
+    determined_by:
+      description: Full name of the individual responsible for determining the taxanomic
+        name of the specimen. Sometimes the name will be near to the characters 'det'
+        to denote determination. This name may be isolated from other names in the
+        unformatted OCR text.
+      format: verbatim transcription
+      null_value: ''
+    elevation_units:
+      description: 'Elevation units must be meters. If min_elevation field is populated,
+        then elevation_units: ''m''. Otherwise elevation_units: ''''.'
+      format: spell check transcription
+      null_value: ''
+    end_date:
+      description: 'If a date range is provided, this represents the later or ending
+        date of the collection period, formatted as year-month-day. If specific components
+        of the date are unknown, they should be replaced with zeros. Examples: ''0000-00-00''
+        if the entire end date is unknown, ''YYYY-00-00'' if only the year of the
+        end date is known, and ''YYYY-MM-00'' if year and month of the end date are
+        known but the day is not.'
+      format: yyyy-mm-dd
+      null_value: ''
+    forma:
+      description: Taxonomic determination to form (f.).
+      format: verbatim transcription
+      null_value: ''
+    genus:
+      description: Taxonomic determination to genus. Genus must be capitalized. If
+        genus is not present use the taxonomic family name followed by the word 'indet'.
+      format: verbatim transcription
+      null_value: ''
+    habitat:
+      description: Description of a plant's habitat or the location where the specimen
+        was collected. Ignore descriptions of the plant itself.
+      format: verbatim transcription
+      null_value: ''
+    locality_name:
+      description: Description of geographic location, landscape, landmarks, regional
+        features, nearby places, or any contextual information aiding in pinpointing
+        the exact origin or site of the specimen.
+      format: verbatim transcription
+      null_value: ''
+    max_elevation:
+      description: Maximum elevation or altitude in meters. If only one elevation
+        is present, then max_elevation should be set to the null_value. Only if units
+        are explicit then convert from feet ('ft' or 'ft.' or 'feet') to meters ('m'
+        or 'm.' or 'meters'). Round to integer.
+      format: integer
+      null_value: ''
+    min_elevation:
+      description: Minimum elevation or altitude in meters. Only if units are explicit
+        then convert from feet ('ft' or 'ft.' or 'feet') to meters ('m' or 'm.' or
+        'meters'). Round to integer.
+      format: integer
+      null_value: ''
+    multiple_names:
+      description: Indicate whether multiple people or collector names are present
+        in the unformatted OCR text. If you see more than one person's name the value
+        is 'yes'; otherwise the value is 'no'.
+      format: boolean yes no
+      null_value: ''
+    plant_description:
+      description: Description of plant features such as leaf shape, size, color,
+        stem texture, height, flower structure, scent, fruit or seed characteristics,
+        root system type, overall growth habit and form, any notable aroma or secretions,
+        presence of hairs or bristles, and any other distinguishing morphological
+        or physiological characteristics.
+      format: verbatim transcription
+      null_value: ''
+    species:
+      description: Taxonomic determination to species, do not capitalize species.
+      format: verbatim transcription
+      null_value: ''
+    state:
+      description: Administrative division 1 that corresponds to the current geographic
+        location of collection. Capitalize first letter of each word. Administrative
+        division 1 is equivalent to a U.S. State.
+      format: spell check transcription
+      null_value: ''
+    subspecies:
+      description: Taxonomic determination to subspecies (subsp.).
+      format: verbatim transcription
+      null_value: ''
+    variety:
+      description: Taxonomic determination to variety (var).
+      format: verbatim transcription
+      null_value: ''
+    verbatim_coordinates:
+      description: Verbatim location coordinates as they appear on the label. Do not
+        convert formats. Possible coordinate types are one of [Lat, Long, UTM, TRS].
+      format: verbatim transcription
+      null_value: ''
+    verbatim_date:
+      description: Date of collection exactly as it appears on the label. Do not change
+        the format or correct typos.
+      format: verbatim transcription
+      null_value: s.d.
+  SpeciesName:
+    taxonomy:
+    - Genus_species

custom_prompts/version_2_OSU.yaml ADDED Viewed

	@@ -0,0 +1,230 @@

+LLM: gpt
+instructions: '1. Refactor the unstructured OCR text into a dictionary based on the
+  JSON structure outlined below.
+  2. You should map the unstructured OCR text to the appropriate JSON key and then
+  populate the field based on its rules.
+  3. Some JSON key fields are permitted to remain empty if the corresponding information
+  is not found in the unstructured OCR text.
+  4. Ignore any information in the OCR text that doesn''t fit into the defined JSON
+  structure.
+  5. Duplicate dictionary fields are not allowed.
+  6. Ensure that all JSON keys are in lowercase.
+  7. Ensure that new JSON field values follow sentence case capitalization.
+  8. Ensure all key-value pairs in the JSON dictionary strictly adhere to the format
+  and data types specified in the template.
+  9. Ensure the output JSON string is valid JSON format. It should not have trailing
+  commas or unquoted keys.
+  10. Only return a JSON dictionary represented as a string. You should not explain
+  your answer.'
+json_formatting_instructions: "The next section of instructions outlines how to format\
+  \ the JSON dictionary. The keys are the same as those of the final formatted JSON\
+  \ object.\nFor each key there is a format requirement that specifies how to transcribe\
+  \ the information for that key. \nThe possible formatting options are:\n1. \"verbatim\
+  \ transcription\" - field is populated with verbatim text from the unformatted OCR.\n\
+  2. \"spell check transcription\" - field is populated with spelling corrected text\
+  \ from the unformatted OCR.\n3. \"boolean yes no\" - field is populated with only\
+  \ yes or no.\n4. \"boolean 1 0\" - field is populated with only 1 or 0.\n5. \"integer\"\
+  \ - field is populated with only an integer.\n6. \"[list]\" - field is populated\
+  \ from one of the values in the list.\n7. \"yyyy-mm-dd\" - field is populated with\
+  \ a date in the format year-month-day.\nThe desired null value is also given. Populate\
+  \ the field with the null value of the information for that key is not present in\
+  \ the unformatted OCR text."
+mapping:
+  COLLECTING:
+  - collectors
+  - collector_number
+  - determined_by
+  - multiple_names
+  - verbatim_date
+  - date
+  - end_date
+  GEOGRAPHY:
+  - country
+  - state
+  - county
+  - min_elevation
+  - max_elevation
+  - elevation_units
+  LOCALITY:
+  - locality_name
+  - verbatim_coordinates
+  - decimal_coordinates
+  - datum
+  - plant_description
+  - cultivated
+  - habitat
+  MISCELLANEOUS: []
+  TAXONOMY:
+  - catalog_number
+  - genus
+  - species
+  - subspecies
+  - variety
+  - forma
+rules:
+  Dictionary:
+    catalog_number:
+      description: The barcode identifier, typically a number with at least 6 digits,
+        but fewer than 30 digits.
+      format: verbatim transcription
+      null_value: ''
+    collector_number:
+      description: Unique identifier or number that denotes the specific collecting
+        event and associated with the collector.
+      format: verbatim transcription
+      null_value: s.n.
+    collectors:
+      description: Full name(s) of the individual(s) responsible for collecting the
+        specimen. When multiple collectors are involved, their names should be separated
+        by commas.
+      format: verbatim transcription
+      null_value: not present
+    country:
+      description: Country that corresponds to the current geographic location of
+        collection. Capitalize first letter of each word. If abbreviation is given
+        populate field with the full spelling of the country's name.
+      format: spell check transcription
+      null_value: ''
+    county:
+      description: Administrative division 2 that corresponds to the current geographic
+        location of collection; capitalize first letter of each word. Administrative
+        division 2 is equivalent to a U.S. county, parish, borough.
+      format: spell check transcription
+      null_value: ''
+    cultivated:
+      description: Cultivated plants are intentionally grown by humans. In text descriptions,
+        look for planting dates, garden locations, ornamental, cultivar names, garden,
+        or farm to indicate cultivated plant. The value 1 indicates that the specimen
+        was cultivated, the value zero otherwise.
+      format: boolean 1 0
+      null_value: '0'
+    date:
+      description: 'Date the specimen was collected formatted as year-month-day. If
+        specific components of the date are unknown, they should be replaced with
+        zeros. Examples: ''0000-00-00'' if the entire date is unknown, ''YYYY-00-00''
+        if only the year is known, and ''YYYY-MM-00'' if year and month are known
+        but day is not.'
+      format: yyyy-mm-dd
+      null_value: ''
+    datum:
+      description: Datum of location coordinates. Possible values are include in the
+        format list. Leave field blank if unclear. [WGS84, WGS72, WGS66, WGS60, NAD83,
+        NAD27, OSGB36, ETRS89, ED50, GDA94, JGD2011, Tokyo97, KGD2002, TWD67, TWD97,
+        BJS54, XAS80, GCJ-02, BD-09, PZ-90.11, GTRF, CGCS2000, ITRF88, ITRF89, ITRF90,
+        ITRF91, ITRF92, ITRF93, ITRF94, ITRF96, ITRF97, ITRF2000, ITRF2005, ITRF2008,
+        ITRF2014, Hong Kong Principal Datum, SAD69]
+      format: '[list]'
+      null_value: ''
+    decimal_coordinates:
+      description: Correct and convert the verbatim location coordinates to conform
+        with the decimal degrees GPS coordinate format.
+      format: spell check transcription
+      null_value: ''
+    determined_by:
+      description: Full name of the individual responsible for determining the taxanomic
+        name of the specimen. Sometimes the name will be near to the characters 'det'
+        to denote determination. This name may be isolated from other names in the
+        unformatted OCR text.
+      format: verbatim transcription
+      null_value: ''
+    elevation_units:
+      description: 'Elevation units must be meters. If min_elevation field is populated,
+        then elevation_units: ''m''. Otherwise elevation_units: ''''.'
+      format: spell check transcription
+      null_value: ''
+    end_date:
+      description: 'If a date range is provided, this represents the later or ending
+        date of the collection period, formatted as year-month-day. If specific components
+        of the date are unknown, they should be replaced with zeros. Examples: ''0000-00-00''
+        if the entire end date is unknown, ''YYYY-00-00'' if only the year of the
+        end date is known, and ''YYYY-MM-00'' if year and month of the end date are
+        known but the day is not.'
+      format: yyyy-mm-dd
+      null_value: ''
+    forma:
+      description: Taxonomic determination to form (f.).
+      format: verbatim transcription
+      null_value: ''
+    genus:
+      description: Taxonomic determination to genus. Genus must be capitalized. If
+        genus is not present use the taxonomic family name followed by the word 'indet'.
+      format: verbatim transcription
+      null_value: ''
+    habitat:
+      description: Description of a plant's habitat or the location where the specimen
+        was collected. Ignore descriptions of the plant itself.
+      format: verbatim transcription
+      null_value: ''
+    locality_name:
+      description: Description of geographic location, landscape, landmarks, regional
+        features, nearby places, or any contextual information aiding in pinpointing
+        the exact origin or site of the specimen.
+      format: verbatim transcription
+      null_value: ''
+    max_elevation:
+      description: Maximum elevation or altitude in meters. If only one elevation
+        is present, then max_elevation should be set to the null_value. Only if units
+        are explicit then convert from feet ('ft' or 'ft.' or 'feet') to meters ('m'
+        or 'm.' or 'meters'). Round to integer.
+      format: integer
+      null_value: ''
+    min_elevation:
+      description: Minimum elevation or altitude in meters. Only if units are explicit
+        then convert from feet ('ft' or 'ft.' or 'feet') to meters ('m' or 'm.' or
+        'meters'). Round to integer.
+      format: integer
+      null_value: ''
+    multiple_names:
+      description: Indicate whether multiple people or collector names are present
+        in the unformatted OCR text. If you see more than one person's name the value
+        is 'yes'; otherwise the value is 'no'.
+      format: boolean yes no
+      null_value: ''
+    plant_description:
+      description: Description of plant features such as leaf shape, size, color,
+        stem texture, height, flower structure, scent, fruit or seed characteristics,
+        root system type, overall growth habit and form, any notable aroma or secretions,
+        presence of hairs or bristles, and any other distinguishing morphological
+        or physiological characteristics.
+      format: verbatim transcription
+      null_value: ''
+    species:
+      description: Taxonomic determination to species, do not capitalize species.
+      format: verbatim transcription
+      null_value: ''
+    state:
+      description: Administrative division 1 that corresponds to the current geographic
+        location of collection. Capitalize first letter of each word. Administrative
+        division 1 is equivalent to a U.S. State.
+      format: spell check transcription
+      null_value: ''
+    subspecies:
+      description: Taxonomic determination to subspecies (subsp.).
+      format: verbatim transcription
+      null_value: ''
+    variety:
+      description: Taxonomic determination to variety (var).
+      format: verbatim transcription
+      null_value: ''
+    verbatim_coordinates:
+      description: Verbatim location coordinates as they appear on the label. Do not
+        convert formats. Possible coordinate types are one of [Lat, Long, UTM, TRS].
+      format: verbatim transcription
+      null_value: ''
+    verbatim_date:
+      description: Date of collection exactly as it appears on the label. Do not change
+        the format or correct typos.
+      format: verbatim transcription
+      null_value: s.d.
+  SpeciesName:
+    taxonomy:
+    - Genus_species

demo/ba/ba.jpg ADDED Viewed

Git LFS Details

SHA256: 852bd2d2a5c142c4aed2ccaf0e59b00b8a9a31ebfb530b2fec7b8ae741296e20
Pointer size: 131 Bytes
Size of remote file: 781 kB

demo/ba/ba.png ADDED Viewed

Git LFS Details

SHA256: c982940062f2d4ceaa387cc58055045ea2bef69688a59262395d48943bc0b1f1
Pointer size: 132 Bytes
Size of remote file: 5.02 MB

demo/ba/ba2.png ADDED Viewed

Git LFS Details

SHA256: 917ca82feb01334499b95113f9bf6e4dfb0ca8f68d33c886ca2879039dd489be
Pointer size: 132 Bytes
Size of remote file: 4.56 MB

demo/demo_gallery/NY_1928185102_Heliotropiaceae_Heliotropium_indicum.jpg ADDED Viewed

Git LFS Details

SHA256: 7553e2eaa8220d6cc4dd318213b4b4971509d602727dd09e7994de4f9642657b
Pointer size: 132 Bytes
Size of remote file: 2.35 MB

demo/demo_gallery/SMF_3046042583_Ebenaceae_Diospyros_mespiliformis.jpg ADDED Viewed

Git LFS Details

SHA256: bb6e37ac854e96af6d5ecdee3dc2a7f3542eab2a24119148f7a8ac7d4aba397f
Pointer size: 132 Bytes
Size of remote file: 4.65 MB

demo/demo_gallery/UM_1807475718_Monimiaceae_Hedycarya_parvifolia.jpg ADDED Viewed

Git LFS Details

SHA256: 1dcd89c2fb7f0ed5c36584647e17a9aa1bf9e80631a658a9f63d9e7f51622ec2
Pointer size: 132 Bytes
Size of remote file: 9.89 MB

demo/demo_gallery/UM_1915455196_Cardiopteridaceae_Citronella_sarmentosa.jpg ADDED Viewed

Git LFS Details

SHA256: 9e8dee30b9c617b746eaf6d5f9c404243ab2a80355c6ab02154fbbe27bcc5396
Pointer size: 132 Bytes
Size of remote file: 4.46 MB

demo/demo_images/UM_1807464860_Phellinaceae_Phelline_dumbeensis.jpg ADDED Viewed

Git LFS Details

SHA256: 0974bb709c651b8913f9e5d85b00f021d5d4fee683e7b9593270101be8674ad1
Pointer size: 132 Bytes
Size of remote file: 5.61 MB

demo/img/expense_report.PNG ADDED Viewed

Git LFS Details

SHA256: cc72ffe78af66b4a311882cacbf072c8f9275f35b512c00c91bd5df62778da68
Pointer size: 131 Bytes
Size of remote file: 161 kB

demo/img/prompt_1.PNG ADDED Viewed

Git LFS Details

SHA256: ee38593d57852fc310809d415a2edd1f419c207036b6d86f9fbc9309601a91db
Pointer size: 130 Bytes
Size of remote file: 78.1 kB

demo/img/prompt_2.PNG ADDED Viewed

Git LFS Details

SHA256: 7e48061b5a244d1a5926107947f3d62c803133f848e149952e2a6335772cef91
Pointer size: 131 Bytes
Size of remote file: 167 kB

demo/img/prompt_3.PNG ADDED Viewed

Git LFS Details

SHA256: 104e7d8676af9f5ed7a445d5ceac93bcb9d512b42928e1bb2abe07c6b1cb432b
Pointer size: 131 Bytes
Size of remote file: 128 kB

demo/img/prompt_4.PNG ADDED Viewed

Git LFS Details

SHA256: c5476d266829d37bb1601ac49299aa41e0e5bf6ec2cd3930e3fd2168b7022267
Pointer size: 131 Bytes
Size of remote file: 277 kB

demo/img/prompt_5.PNG ADDED Viewed

Git LFS Details

SHA256: 20b5f3438c89ab352e2c3246443ccefe43e7b7e64884bdecd586b5684e958d4b
Pointer size: 131 Bytes
Size of remote file: 102 kB

demo/img/validation_1.PNG ADDED Viewed

Git LFS Details

SHA256: 44f29afac203b22a9422dc87e5d12e4627ea1b818bd91af48357c18bd6ca7e9a
Pointer size: 130 Bytes
Size of remote file: 93.9 kB

demo/img/validation_gpt.PNG ADDED Viewed

Git LFS Details

SHA256: c9e98ae5b98eb7a945ad15698508baf30327b7621f53fc62046f25a22f930a3e
Pointer size: 131 Bytes
Size of remote file: 159 kB

demo/img/validation_gpu.PNG ADDED Viewed

Git LFS Details

SHA256: 337e2c1d5efbbc5e71b319d6d405b2aae72ae568a7176de93aa2a1dcf902af43
Pointer size: 130 Bytes
Size of remote file: 60.1 kB

demo/img/validation_palm.PNG ADDED Viewed

Git LFS Details

SHA256: ef5c86ca77c84f2a0d399e37b41648297d1664a06329fe6d31293dfc23480d7d
Pointer size: 131 Bytes
Size of remote file: 140 kB

domain_knowledge/SLTP_UM_AllAsiaMinimalInRegion.xlsx ADDED Viewed

Binary file (600 kB). View file

img/icon.ico ADDED Viewed

img/icon.jpg ADDED Viewed

Git LFS Details

SHA256: 609e0f274d52db4c0c7ac73fc9764a72a8e1c7d975bd4f98a8037db2b3c98a0c
Pointer size: 131 Bytes
Size of remote file: 303 kB

img/icon2.ico ADDED Viewed

img/logo.png ADDED Viewed

Git LFS Details

SHA256: 760eb08b5a54ca93fcdb1b59f39f28275648855aa92a439e0f9c28dc71c70bfc
Pointer size: 131 Bytes
Size of remote file: 169 kB

requirements.txt ADDED Viewed

	@@ -0,0 +1,34 @@

+--extra-index-url https://download.pytorch.org/whl/cpu
+torch==2.0.1
+torchvision==0.15.2
+torchaudio==2.0.2
+wheel
+streamlit
+streamlit-extras
+plotly
+pyyaml
+Pillow
+pandas
+matplotlib
+matplotlib-inline
+tqdm
+openai
+langchain
+tiktoken
+openpyxl
+google-generativeai
+google-cloud-storage
+google-cloud-vision
+opencv-python
+chromadb
+chroma-migrate
+InstructorEmbedding
+transformers
+sentence-transformers
+seaborn
+dask
+psutil
+py-cpuinfo
+azureml-sdk
+azure-identity

run_VoucherVision.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import streamlit.web.cli as stcli
+import os, sys
+# Insert a file uploader that accepts multiple files at a time:
+# import streamlit as st
+# uploaded_files = st.file_uploader("Choose a CSV file", accept_multiple_files=True)
+# for uploaded_file in uploaded_files:
+#     bytes_data = uploaded_file.read()
+#     st.write("filename:", uploaded_file.name)
+#     st.write(bytes_data)
+def resolve_path(path):
+    resolved_path = os.path.abspath(os.path.join(os.getcwd(), path))
+    return resolved_path
+if __name__ == "__main__":
+    dir_home = os.path.dirname(__file__)
+    # pip install protobuf==3.20.0
+    sys.argv = [
+        "streamlit",
+        "run",
+        resolve_path(os.path.join(dir_home,"vouchervision", "VoucherVision_GUI.py")),
+        "--global.developmentMode=false",
+        "--server.port=8525",
+    ]
+    sys.exit(stcli.main())

vouchervision/LLM_Falcon.py ADDED Viewed

	@@ -0,0 +1,112 @@

+import os, sys, inspect, json, time
+# currentdir = os.path.dirname(os.path.abspath(
+#     inspect.getfile(inspect.currentframe())))
+# parentdir = os.path.dirname(currentdir)
+# sys.path.append(parentdir)
+# from prompts import PROMPT_PaLM_UMICH_skeleton_all_asia, PROMPT_PaLM_OCR_Organized, PROMPT_PaLM_Redo
+# from LLM_PaLM import create_OCR_analog_for_input, num_tokens_from_string
+'''
+https://docs.ai21.com/docs/python-sdk-with-amazon-bedrock
+https://techcommunity.microsoft.com/t5/ai-machine-learning-blog/falcon-llms-in-azure-machine-learning/ba-p/3876847
+https://github.com/Azure/azureml-examples/blob/main/sdk/python/foundation-models/huggingface/inference/text-generation-streaming/text-generation-streaming-online-endpoint.ipynb
+https://ml.azure.com/registries/HuggingFace/models/tiiuae-falcon-40b-instruct/version/12?tid=e66e77b4-5724-44d7-8721-06df160450ce#overview
+https://azure.microsoft.com/en-us/products/machine-learning/
+'''
+# from azure.ai.ml import MLClient
+# from azure.identity import (
+#     DefaultAzureCredential,
+#     InteractiveBrowserCredential,
+#     ClientSecretCredential,
+# )
+# from azure.ai.ml.entities import AmlCompute
+# try:
+#     credential = DefaultAzureCredential()
+#     credential.get_token("https://management.azure.com/.default")
+# except Exception as ex:
+#     credential = InteractiveBrowserCredential()
+# # connect to a workspace
+# workspace_ml_client = None
+# try:
+#     workspace_ml_client = MLClient.from_config(credential)
+#     subscription_id = workspace_ml_client.subscription_id
+#     workspace = workspace_ml_client.workspace_name
+#     resource_group = workspace_ml_client.resource_group_name
+# except Exception as ex:
+#     print(ex)
+#     # Enter details of your workspace
+#     subscription_id = "<SUBSCRIPTION_ID>"
+#     resource_group = "<RESOURCE_GROUP>"
+#     workspace = "<AML_WORKSPACE_NAME>"
+#     workspace_ml_client = MLClient(
+#         credential, subscription_id, resource_group, workspace
+#     )
+# # Connect to the HuggingFaceHub registry
+# registry_ml_client = MLClient(credential, registry_name="HuggingFace")
+# print(registry_ml_client)
+'''
+def OCR_to_dict_Falcon(logger, OCR, VVE):
+    # Find a similar example from the domain knowledge
+    domain_knowledge_example = VVE.query_db(OCR, 4)
+    similarity = VVE.get_similarity()
+    domain_knowledge_example_string = json.dumps(domain_knowledge_example)
+    try:
+        logger.info(f'Length of OCR raw -- {len(OCR)}')
+    except:
+        print(f'Length of OCR raw -- {len(OCR)}')
+    # Create input: output: for Falcon
+    # Assuming Falcon requires a similar structure as PaLM
+    in_list, out_list = create_OCR_analog_for_input(domain_knowledge_example)
+    # Construct the prompt for Falcon
+    # Adjust this based on Falcon's requirements
+    # prompt = PROMPT_Falcon_skeleton(OCR, in_list, out_list)
+    prompt = PROMPT_PaLM_UMICH_skeleton_all_asia(OCR, in_list, out_list) # must provide examples to PaLM differently than for chatGPT, at least 2 examples
+    nt = num_tokens_from_string(prompt, "falcon_model_name")  # Replace "falcon_model_name" with the appropriate model name for Falcon
+    try:
+        logger.info(f'Prompt token length --- {nt}')
+    except:
+        print(f'Prompt token length --- {nt}')
+    # Assuming Falcon has a similar API structure as PaLM
+    # Adjust the settings based on Falcon's requirements
+    Falcon_settings = {
+        'model': 'models/falcon_model_name',  # Replace with the appropriate model name for Falcon
+        'temperature': 0,
+        'candidate_count': 1,
+        'top_k': 40,
+        'top_p': 0.95,
+        'max_output_tokens': 8000,
+        'stop_sequences': [],
+        # Add any other required settings for Falcon
+    }
+    # Send the prompt to Falcon for inference
+    # Adjust the API call based on Falcon's requirements
+    response = falcon.generate_text(**Falcon_settings, prompt=prompt)
+    # Process the response from Falcon
+    if response and response.result:
+        if isinstance(response.result, (str, bytes)):
+            response_valid = check_and_redo_JSON(response, Falcon_settings, logger)
+        else:
+            response_valid = {}
+    else:
+        response_valid = {}
+    return response_valid
+'''

vouchervision/LLM_PaLM.py ADDED Viewed

	@@ -0,0 +1,209 @@

+import os
+import sys
+import inspect
+import json
+from json import JSONDecodeError
+import tiktoken
+import random
+import google.generativeai as palm
+currentdir = os.path.dirname(os.path.abspath(
+    inspect.getfile(inspect.currentframe())))
+parentdir = os.path.dirname(currentdir)
+sys.path.append(parentdir)
+from prompt_catalog import PromptCatalog
+from general_utils import num_tokens_from_string
+"""
+DEPRECATED:
+    Safety setting regularly block a response, so set to 4 to disable
+    class HarmBlockThreshold(Enum):
+        HARM_BLOCK_THRESHOLD_UNSPECIFIED = 0
+        BLOCK_LOW_AND_ABOVE = 1
+        BLOCK_MEDIUM_AND_ABOVE = 2
+        BLOCK_ONLY_HIGH = 3
+        BLOCK_NONE = 4
+"""
+SAFETY_SETTINGS = [
+    {
+        "category": "HARM_CATEGORY_DEROGATORY",
+        "threshold": "BLOCK_NONE",
+    },
+    {
+        "category": "HARM_CATEGORY_TOXICITY",
+        "threshold": "BLOCK_NONE",
+    },
+    {
+        "category": "HARM_CATEGORY_VIOLENCE",
+        "threshold": "BLOCK_NONE",
+    },
+    {
+        "category": "HARM_CATEGORY_SEXUAL",
+        "threshold": "BLOCK_NONE",
+    },
+    {
+        "category": "HARM_CATEGORY_MEDICAL",
+        "threshold": "BLOCK_NONE",
+    },
+    {
+        "category": "HARM_CATEGORY_DANGEROUS",
+        "threshold": "BLOCK_NONE",
+    },
+]
+PALM_SETTINGS = {
+    'model': 'models/text-bison-001',
+    'temperature': 0,
+    'candidate_count': 1,
+    'top_k': 40,
+    'top_p': 0.95,
+    'max_output_tokens': 8000,
+    'stop_sequences': [],
+    'safety_settings': SAFETY_SETTINGS,
+}
+PALM_SETTINGS_REDO = {
+    'model': 'models/text-bison-001',
+    'temperature': 0.05,
+    'candidate_count': 1,
+    'top_k': 40,
+    'top_p': 0.95,
+    'max_output_tokens': 8000,
+    'stop_sequences': [],
+    'safety_settings': SAFETY_SETTINGS,
+}
+def OCR_to_dict_PaLM(logger, OCR, prompt_version, VVE):
+    try:
+        logger.info(f'Length of OCR raw -- {len(OCR)}')
+    except:
+        print(f'Length of OCR raw -- {len(OCR)}')
+    # prompt = PROMPT_PaLM_UMICH_skeleton_all_asia(OCR, in_list, out_list) # must provide examples to PaLM differently than for chatGPT, at least 2 examples
+    Prompt = PromptCatalog(OCR)
+    if prompt_version in ['prompt_v2_palm2']:
+        version = 'v2'
+        prompt = Prompt.prompt_v2_palm2(OCR)
+    elif prompt_version in ['prompt_v1_palm2',]:
+        version = 'v1'
+        # create input: output: for PaLM
+        # Find a similar example from the domain knowledge
+        domain_knowledge_example = VVE.query_db(OCR, 4)
+        similarity= VVE.get_similarity()
+        domain_knowledge_example_string = json.dumps(domain_knowledge_example)
+        in_list, out_list = create_OCR_analog_for_input(domain_knowledge_example)
+        prompt = Prompt.prompt_v1_palm2(in_list, out_list, OCR)
+    elif prompt_version in ['prompt_v1_palm2_noDomainKnowledge',]:
+        version = 'v1'
+        prompt = Prompt.prompt_v1_palm2_noDomainKnowledge(OCR)
+    else:
+        version = 'custom'
+        prompt, n_fields, xlsx_headers = Prompt.prompt_v2_custom(prompt_version, OCR=OCR, is_palm=True)
+        # raise
+    nt = num_tokens_from_string(prompt, "cl100k_base")
+    # try:
+    logger.info(f'Prompt token length --- {nt}')
+    # except:
+        # print(f'Prompt token length --- {nt}')
+    do_use_SOP = False ########
+    if do_use_SOP:
+        '''TODO: Check back later to see if LangChain will support PaLM'''
+        # logger.info(f'Waiting for PaLM API call --- Using StructuredOutputParser')
+        # response = structured_output_parser(OCR, prompt, logger)
+        # return response['Dictionary']
+        pass
+    else:
+        # try:
+        logger.info(f'Waiting for PaLM 2 API call')
+        # except:
+            # print(f'Waiting for PaLM 2 API call --- Content')
+        # safety_thresh = 4
+        # PaLM_settings = {'model': 'models/text-bison-001','temperature': 0,'candidate_count': 1,'top_k': 40,'top_p': 0.95,'max_output_tokens': 8000,'stop_sequences': [],
+                        #  'safety_settings': [{"category":"HARM_CATEGORY_DEROGATORY","threshold":safety_thresh},{"category":"HARM_CATEGORY_TOXICITY","threshold":safety_thresh},{"category":"HARM_CATEGORY_VIOLENCE","threshold":safety_thresh},{"category":"HARM_CATEGORY_SEXUAL","threshold":safety_thresh},{"category":"HARM_CATEGORY_MEDICAL","threshold":safety_thresh},{"category":"HARM_CATEGORY_DANGEROUS","threshold":safety_thresh}],}
+        response = palm.generate_text(prompt=prompt, **PALM_SETTINGS)
+        if response and response.result:
+            if isinstance(response.result, (str, bytes)):
+                response_valid = check_and_redo_JSON(response, logger, version)
+            else:
+                response_valid = {}
+        else:
+            response_valid = {}
+        logger.info(f'Candidate JSON\n{response.result}')
+        return response_valid, nt
+def check_and_redo_JSON(response, logger, version):
+    try:
+        response_valid = json.loads(response.result)
+        logger.info(f'Response --- First call passed')
+        return response_valid
+    except JSONDecodeError:
+        try:
+            response_valid = json.loads(response.result.strip('```').replace('json\n', '', 1).replace('json', '', 1))
+            logger.info(f'Response --- Manual removal of ```json succeeded')
+            return response_valid
+        except:
+            logger.info(f'Response --- First call failed. Redo...')
+            Prompt = PromptCatalog()
+            if version == 'v1':
+                prompt_redo = Prompt.prompt_palm_redo_v1(response.result)
+            elif version == 'v2':
+                prompt_redo = Prompt.prompt_palm_redo_v2(response.result)
+            elif version == 'custom':
+                prompt_redo = Prompt.prompt_v2_custom_redo(response.result, is_palm=True)
+            # prompt_redo = PROMPT_PaLM_Redo(response.result)
+            try:
+                response = palm.generate_text(prompt=prompt_redo, **PALM_SETTINGS)
+                response_valid = json.loads(response.result)
+                logger.info(f'Response --- Second call passed')
+                return response_valid
+            except JSONDecodeError:
+                logger.info(f'Response --- Second call failed. Final redo. Temperature changed to 0.05')
+                try:
+                    response = palm.generate_text(prompt=prompt_redo, **PALM_SETTINGS_REDO)
+                    response_valid = json.loads(response.result)
+                    logger.info(f'Response --- Third call passed')
+                    return response_valid
+                except JSONDecodeError:
+                    return None
+def create_OCR_analog_for_input(domain_knowledge_example):
+    in_list = []
+    out_list = []
+    # Iterate over the domain_knowledge_example (list of dictionaries)
+    for row_dict in domain_knowledge_example:
+        # Convert the dictionary to a JSON string and add it to the out_list
+        domain_knowledge_example_string = json.dumps(row_dict)
+        out_list.append(domain_knowledge_example_string)
+        # Create a single string from all values in the row_dict
+        row_text = '||'.join(str(v) for v in row_dict.values())
+        # Split the row text by '||', shuffle the parts, and then re-join with a single space
+        parts = row_text.split('||')
+        random.shuffle(parts)
+        shuffled_text = ' '.join(parts)
+        # Add the shuffled_text to the in_list
+        in_list.append(shuffled_text)
+    return in_list, out_list
+def strip_problematic_chars(s):
+    return ''.join(c for c in s if c.isprintable())

vouchervision/LLM_chatGPT_3_5.py ADDED Viewed

	@@ -0,0 +1,420 @@

+import openai
+import os, json, sys, inspect, time, requests
+from langchain.output_parsers import StructuredOutputParser, ResponseSchema
+from langchain.prompts import PromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate
+from langchain.llms import OpenAI
+from langchain.chat_models import ChatOpenAI, AzureChatOpenAI
+from langchain.schema import HumanMessage
+from general_utils import num_tokens_from_string
+currentdir = os.path.dirname(os.path.abspath(
+    inspect.getfile(inspect.currentframe())))
+parentdir = os.path.dirname(currentdir)
+sys.path.append(parentdir)
+from prompts import PROMPT_UMICH_skeleton_all_asia, PROMPT_OCR_Organized, PROMPT_UMICH_skeleton_all_asia_GPT4, PROMPT_OCR_Organized_GPT4, PROMPT_JSON
+from prompt_catalog import PromptCatalog
+RETRY_DELAY = 61  # Wait 60 seconds before retrying
+MAX_RETRIES = 5  # Maximum number of retries
+def azure_call(model, messages):
+    response = model(messages=messages)
+    return response
+def OCR_to_dict(is_azure, logger, MODEL, prompt, llm, prompt_version):
+    for i in range(MAX_RETRIES):
+        try:
+            do_use_SOP = True
+            if do_use_SOP:
+                logger.info(f'Waiting for {MODEL} API call --- Using StructuredOutputParser')
+                response = structured_output_parser(is_azure, MODEL, llm, prompt, logger, prompt_version)
+                if response is None:
+                    return None
+                else:
+                    return response['Dictionary']
+            else:
+                ### Direct GPT ###
+                logger.info(f'Waiting for {MODEL} API call')
+                if not is_azure:
+                    response = openai.ChatCompletion.create(
+                        model=MODEL,
+                        temperature = 0,
+                        messages=[
+                            {"role": "system", "content": "You are a helpful assistant acting as a transcription expert and your job is to transcribe herbarium specimen labels based on OCR data and reformat it to meet Darwin Core Archive Standards into a Python dictionary based on certain rules."},
+                            {"role": "user", "content": prompt},
+                        ],
+                        max_tokens=4096,
+                    )
+                    # print the model's response
+                    return response.choices[0].message['content']
+                else:
+                    msg = HumanMessage(
+                        content=prompt
+                    )
+                    response = azure_call(llm, [msg])
+                    return response.content
+        except Exception as e:
+            logger.error(f'{e}')
+            if i < MAX_RETRIES - 1:  # No delay needed after the last try
+                time.sleep(RETRY_DELAY)
+            else:
+                raise
+# def OCR_to_dict(logger, MODEL, prompt, OCR, BASE_URL, HEADERS):
+#     for i in range(MAX_RETRIES):
+#         try:
+#             do_use_SOP = False
+#             if do_use_SOP:
+#                 logger.info(f'Waiting for {MODEL} API call --- Using StructuredOutputParser -- Content')
+#                 response = structured_output_parser(MODEL, OCR, prompt, logger)
+#                 if response is None:
+#                     return None
+#                 else:
+#                     return response['Dictionary']
+#             else:
+#                 ### Direct GPT through Azure ###
+#                 logger.info(f'Waiting for {MODEL} API call')
+#                 response = azure_gpt_request(prompt, BASE_URL, HEADERS, model_name=MODEL)
+#                 # Handle the response data. Note: You might need to adjust the following line based on the exact response format of the Azure API.
+#                 content = response.get("choices", [{}])[0].get("message", {}).get("content", "")
+#                 return content
+#         except requests.exceptions.RequestException as e:  # Replace openai.error.APIError with requests exception.
+#             # Handle HTTP exceptions. You can adjust this based on the Azure API's error responses.
+#             if e.response.status_code == 502:
+#                 logger.info(f'   ***    502 error was encountered, wait and try again   ***')
+#                 if i < MAX_RETRIES - 1:
+#                     time.sleep(RETRY_DELAY)
+#             else:
+#                 raise
+def OCR_to_dict_16k(is_azure, logger, MODEL, prompt, llm, prompt_version):
+    for i in range(MAX_RETRIES):
+        try:
+            fs = FunctionSchema()
+            response = openai.ChatCompletion.create(
+                model=MODEL,
+                temperature = 0,
+                messages=[
+                    {"role": "system", "content": "You are a helpful assistant acting as a transcription expert and your job is to transcribe herbarium specimen labels based on OCR data and reformat it to meet Darwin Core Archive Standards into a Python dictionary based on certain rules."},
+                    {"role": "user", "content": prompt},
+                ],
+                max_tokens=8000,
+                function_call= "none",
+                functions= fs.format_C21_AA_V1()
+            )
+            # Try to parse the response into JSON
+            call_failed = False
+            try:
+                response_string = response.choices[0].message['content']
+            except:
+                call_failed = True
+                response_string = prompt
+            if not call_failed:
+                try:
+                    # Try to parse the response into JSON
+                    response_dict = json.loads(response_string)
+                    return response_dict['Dictionary']
+                except json.JSONDecodeError:
+                    # If the response is not a valid JSON, call the structured_output_parser_for_function_calls_fail function
+                    logger.info(f'Invalid JSON response, calling structured_output_parser_for_function_calls_fail function')
+                    logger.info(f'Waiting for {MODEL} API call --- Using StructuredOutputParser --- JSON Fixer')
+                    response_sop = structured_output_parser_for_function_calls_fail(is_azure, MODEL, response_string, logger, llm, prompt_version, is_helper=False)
+                    if response_sop is None:
+                        return None
+                    else:
+                        return response_sop['Dictionary']
+            else:
+                try:
+                    logger.info(f'Call Failed. Attempting fallback JSON parse without guidance')
+                    logger.info(f'Waiting for {MODEL} API call --- Using StructuredOutputParser --- JSON Fixer')
+                    response_sop = structured_output_parser_for_function_calls_fail(is_azure, MODEL, response_string, logger, llm, prompt_version, is_helper=False)
+                    if response_sop is None:
+                        return None
+                    else:
+                        return response_sop['Dictionary']
+                except:
+                    return None
+        except Exception as e:
+            # if e.status_code == 401: # or you can check the error message
+            logger.info(f'   ***    401 error was encountered, wait and try again   ***')
+            # If a 401 error was encountered, wait and try again
+            if i < MAX_RETRIES - 1:  # No delay needed after the last try
+                time.sleep(RETRY_DELAY)
+            else:
+                # If it was a different error, re-raise it
+                raise
+def structured_output_parser(is_azure, MODEL, llm, prompt_template, logger, prompt_version, is_helper=False):
+    if not is_helper:
+        response_schemas = [
+            ResponseSchema(name="SpeciesName", description="Taxonomic determination, genus_species"),
+            ResponseSchema(name="Dictionary", description='Formatted JSON object'),]#prompt_template),]
+    elif is_helper:
+        response_schemas = [
+            ResponseSchema(name="Dictionary", description='Formatted JSON object'),#prompt_template),
+            ResponseSchema(name="Summary", description="A one sentence summary of the content"),]
+    output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
+    format_instructions = output_parser.get_format_instructions()
+    prompt = ChatPromptTemplate(
+        messages=[
+            HumanMessagePromptTemplate.from_template("Parse the OCR text into the correct structured format.\n{format_instructions}\n{question}")
+        ],
+        input_variables=["question"],
+        partial_variables={"format_instructions": format_instructions}
+    )
+    # Handle Azure vs OpenAI implementation
+    if is_azure:
+        _input = prompt.format_prompt(question=prompt_template)
+        msg = HumanMessage(content=_input.to_string())
+        output = azure_call(llm, [msg])
+    else:
+        chat_model = ChatOpenAI(temperature=0, model=MODEL)
+        _input = prompt.format_prompt(question=prompt_template)
+        output = chat_model(_input.to_messages())
+    # Log token length if running with Gradio
+    try:
+        nt = num_tokens_from_string(_input.to_string(), "cl100k_base")
+        logger.info(f'Prompt token length --- {nt}')
+    except:
+        pass
+    # Parse the output
+    try:
+        # Check if output is of type 'ai' and parse accordingly
+        if output.type == 'ai':
+            parsed_content = output.content
+            logger.info(f'Formatted JSON\n{parsed_content}')
+        else:
+            # If not 'ai', log and set parsed_content to None or a default value
+            logger.error('Output type is not "ai". Unable to parse.')
+            return None
+        # Clean up the parsed content
+        parsed_content = parsed_content.replace('\n', "").replace('\t', "").replace('|', "")
+        # Attempt to parse the cleaned content
+        try:
+            refined_response = output_parser.parse(parsed_content)
+            return refined_response
+        except Exception as parse_error:
+            # Handle parsing errors specifically
+            logger.error(f'Parsing Error: {parse_error}')
+            return structured_output_parser_for_function_calls_fail(is_azure, MODEL, parsed_content, logger, llm, prompt_version, is_helper)
+    except Exception as e:
+        # Handle any other exceptions that might occur
+        logger.error(f'Unexpected Error: {e}')
+        return None
+def structured_output_parser_for_function_calls_fail(is_azure, MODEL, failed_response, logger, llm, prompt_version, is_helper=False, try_ind=0):
+    if try_ind > 5:
+        return None
+    # prompt_redo = PROMPT_JSON('helper' if is_helper else 'dict', failed_response)
+    Prompt = PromptCatalog()
+    if prompt_version in ['prompt_v1_verbose', 'prompt_v1_verbose_noDomainKnowledge']:
+        prompt_redo = Prompt.prompt_gpt_redo_v1(failed_response)
+    elif prompt_version in ['prompt_v2_json_rules']:
+        prompt_redo = Prompt.prompt_gpt_redo_v2(failed_response)
+    else:
+        prompt_redo = Prompt.prompt_v2_custom_redo(failed_response, is_palm=False)
+    response_schemas = [
+        ResponseSchema(name="Summary", description="A one sentence summary of the content"),
+        ResponseSchema(name="Dictionary", description='Formatted JSON object')
+    ]
+    output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
+    format_instructions = output_parser.get_format_instructions()
+    prompt = ChatPromptTemplate(
+        messages=[
+            HumanMessagePromptTemplate.from_template("The following text contains JSON formatted text, but there is an error that you need to correct.\n{format_instructions}\n{question}")
+        ],
+        input_variables=["question"],
+        partial_variables={"format_instructions": format_instructions}
+    )
+    _input = prompt.format_prompt(question=prompt_redo)
+    # Log token length if running with Gradio
+    try:
+        nt = num_tokens_from_string(_input.to_string(), "cl100k_base")
+        logger.info(f'Prompt Redo token length --- {nt}')
+    except:
+        pass
+    if is_azure:
+        msg = HumanMessage(content=_input.to_string())
+        output = azure_call(llm, [msg])
+    else:
+        chat_model = ChatOpenAI(temperature=0, model=MODEL)
+        output = chat_model(_input.to_messages())
+    try:
+        refined_response = output_parser.parse(output.content)
+    except json.decoder.JSONDecodeError as e:
+        try_ind += 1
+        error_message = str(e)
+        redo_content = f'The error messsage is: {error_message}\nThe broken JSON object is: {output.content}'
+        logger.info(f'[Failed JSON Object]\n{output.content}')
+        refined_response = structured_output_parser_for_function_calls_fail(is_azure, MODEL, redo_content, logger, llm, prompt_version, is_helper, try_ind)
+    except:
+        try_ind += 1
+        logger.info(f'[Failed JSON Object]\n{output.content}')
+        refined_response = structured_output_parser_for_function_calls_fail(is_azure, MODEL, output.content, logger, llm, prompt_version, is_helper, try_ind)
+    return refined_response
+class FunctionSchema:
+    def __init__(self):
+        pass
+    def format_C21_AA_V1(self):
+        return [
+            {
+                "name": "format_C21_AA_V1",
+                "description": "Format the given data into a specific dictionary",
+                "parameters": {
+                    "type": "object",
+                    "properties": {},  # specify parameters here if your function requires any
+                    "required": []  # list of required parameters
+                },
+                "output_type": "json",
+                "output_schema": {
+                    "type": "object",
+                    "properties": {
+                        "Dictionary": {
+                            "type": "object",
+                            "properties": {
+                                "Catalog Number": {"type": "array", "items": {"type": "string"}},
+                                "Genus": {"type": "array", "items": {"type": "string"}},
+                                "Species": {"type": "array", "items": {"type": "string"}},
+                                "subspecies": {"type": "array", "items": {"type": "string"}},
+                                "variety": {"type": "array", "items": {"type": "string"}},
+                                "forma": {"type": "array", "items": {"type": "string"}},
+                                "Country": {"type": "array", "items": {"type": "string"}},
+                                "State": {"type": "array", "items": {"type": "string"}},
+                                "County": {"type": "array", "items": {"type": "string"}},
+                                "Locality Name": {"type": "array", "items": {"type": "string"}},
+                                "Min Elevation": {"type": "array", "items": {"type": "string"}},
+                                "Max Elevation": {"type": "array", "items": {"type": "string"}},
+                                "Elevation Units": {"type": "array", "items": {"type": "string"}},
+                                "Verbatim Coordinates": {"type": "array", "items": {"type": "string"}},
+                                "Datum": {"type": "array", "items": {"type": "string"}},
+                                "Cultivated": {"type": "array", "items": {"type": "string"}},
+                                "Habitat": {"type": "array", "items": {"type": "string"}},
+                                "Collectors": {"type": "array", "items": {"type": "string"}},
+                                "Collector Number": {"type": "array", "items": {"type": "string"}},
+                                "Verbatim Date": {"type": "array", "items": {"type": "string"}},
+                                "Date": {"type": "array", "items": {"type": "string"}},
+                                "End Date": {"type": "array", "items": {"type": "string"}}
+                            }
+                        },
+                        "SpeciesName": {
+                            "type": "object",
+                            "properties": {
+                                "taxonomy": {"type": "array", "items": {"type": "string"}}
+                            }
+                        }
+                    }
+                }
+            }
+        ]
+    def format_C21_AA_V1_helper(self):
+        return [
+            {
+                "name": "format_C21_AA_V1_helper",
+                "description": "Helper function for format_C21_AA_V1 to further format the given data",
+                "parameters": {
+                    "type": "object",
+                    "properties": {},  # specify parameters here if your function requires any
+                    "required": []  # list of required parameters
+                },
+                "output_type": "json",
+                "output_schema": {
+                    "type": "object",
+                    "properties": {
+                        "Dictionary": {
+                            "type": "object",
+                            "properties": {
+                                "TAXONOMY": {
+                                    "type": "object",
+                                    "properties": {
+                                        "Order": {"type": "array", "items": {"type": "string"}},
+                                        "Family": {"type": "array", "items": {"type": "string"}},
+                                        "Genus":{"type": "array", "items": {"type": "string"}},
+                                        "Species": {"type": "array", "items": {"type": "string"}},
+                                        "Subspecies": {"type": "array", "items": {"type": "string"}},
+                                        "Variety": {"type": "array", "items": {"type": "string"}},
+                                        "Forma": {"type": "array", "items": {"type": "string"}},
+                                    }
+                                },
+                                "GEOGRAPHY": {
+                                    "type": "object",
+                                    "properties": {
+                                        "Country": {"type": "array", "items": {"type": "string"}},
+                                        "State": {"type": "array", "items": {"type": "string"}},
+                                        "Prefecture": {"type": "array", "items": {"type": "string"}},
+                                        "Province": {"type": "array", "items": {"type": "string"}},
+                                        "District": {"type": "array", "items": {"type": "string"}},
+                                        "County": {"type": "array", "items": {"type": "string"}},
+                                        "City": {"type": "array", "items": {"type": "string"}},
+                                        "Administrative Division": {"type": "array", "items": {"type": "string"}},
+                                    }
+                                },
+                                "LOCALITY": {
+                                    "type": "object",
+                                    "properties": {
+                                        "Landscape": {"type": "array", "items": {"type": "string"}},
+                                        "Nearby Places": {"type": "array", "items": {"type": "string"}},
+                                    }
+                                },
+                                "COLLECTING": {
+                                    "type": "object",
+                                    "properties": {
+                                        "Collector": {"type": "array", "items": {"type": "string"}},
+                                        "Collector's Number": {"type": "array", "items": {"type": "string"}},
+                                        "Verbatim Date": {"type": "array", "items": {"type": "string"}},
+                                        "Formatted Date": {"type": "array", "items": {"type": "string"}},
+                                        "Cultivation Status": {"type": "array", "items": {"type": "string"}},
+                                        "Habitat Description": {"type": "array", "items": {"type": "string"}},
+                                    }
+                                },
+                                "MISCELLANEOUS": {
+                                    "type": "object",
+                                    "properties": {
+                                        "Additional Information": {"type": "array", "items": {"type": "string"}},
+                                    }
+                                }
+                            }
+                        },
+                        "Summary": {
+                            "type": "object",
+                            "properties": {
+                                "Content Summary": {"type": "array", "items": {"type": "string"}}
+                            }
+                        }
+                    }
+                }
+            }
+        ]

vouchervision/LM2_logger.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import logging, os, psutil, torch, platform, cpuinfo, yaml #py-cpuinfo
+from vouchervision.general_utils import get_datetime, print_main_warn, print_main_info
+def start_logging(Dirs, cfg):
+    run_name = cfg['leafmachine']['project']['run_name']
+    path_log = os.path.join(Dirs.path_log, '__'.join(['LM2-log',str(get_datetime()), run_name])+'.log')
+    # Disable default StreamHandler
+    logging.getLogger().handlers = []
+    # create logger
+    logger = logging.getLogger('Hardware Components')
+    logger.setLevel(logging.DEBUG)
+    # create file handler and set level to debug
+    fh = logging.FileHandler(path_log)
+    fh.setLevel(logging.DEBUG)
+    # create console handler and set level to debug
+    ch = logging.StreamHandler()
+    ch.setLevel(logging.DEBUG)
+    # create formatter
+    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(name)s - %(message)s')
+    # add formatter to handlers
+    fh.setFormatter(formatter)
+    ch.setFormatter(formatter)
+    # add handlers to logger
+    logger.addHandler(fh)
+    logger.addHandler(ch)
+    # Create a logger for the file handler
+    file_logger = logging.getLogger('file_logger')
+    file_logger.setLevel(logging.DEBUG)
+    file_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
+    file_handler = logging.FileHandler(path_log)
+    file_handler.setLevel(logging.DEBUG)
+    file_handler.setFormatter(file_formatter)
+    file_logger.addHandler(file_handler)
+    # Disable propagation of log messages to the root logger
+    file_logger.propagate = False
+    # 'application' code
+    # logger.debug('debug message')
+    # logger.info('info message')
+    # logger.warning('warn message')
+    # logger.error('error message')
+    # logger.critical('critical message')
+    # Get CPU information
+    logger.info(f"CPU: {find_cpu_info()}")
+    # Get GPU information (using PyTorch)
+    if torch.cuda.is_available():
+        num_gpus = torch.cuda.device_count()
+        if num_gpus == 1:
+            gpu = torch.cuda.get_device_properties(0)
+            logger.info(f"GPU: {gpu.name} ({gpu.total_memory // (1024 * 1024)} MB)")
+        else:
+            for i in range(num_gpus):
+                gpu = torch.cuda.get_device_properties(i)
+                logger.info(f"GPU {i}: {gpu.name} ({gpu.total_memory // (1024 * 1024)} MB)")
+    else:
+        logger.info("No GPU found")
+        logger.info("LeafMachine2 image cropping and embedding search will be extremely slow or not possible.")
+        print_main_info("No GPU found!")
+        print_main_info("LeafMachine2 image cropping and embedding search will be extremely slow or not possible.")
+    # Get memory information
+    mem_info = psutil.virtual_memory()
+    logger.info(f"Memory: {mem_info.total // (1024 * 1024)} MB")
+    logger.info(LM2_banner())
+    logger.info(f"Config added to log file")
+    file_logger.info('Config:\n{}'.format(yaml.dump(cfg)))
+    return logger
+def find_cpu_info():
+    cpu_info = []
+    cpu_info.append(platform.processor())
+    try:
+        with open('/proc/cpuinfo') as f:
+            for line in f:
+                if line.startswith('model name'):
+                    cpu_info.append(line.split(':')[1].strip())
+                    break
+        return ' / '.join(cpu_info)
+    except:
+        try:
+            info = cpuinfo.get_cpu_info()
+            cpu_info = []
+            cpu_info.append(info['brand_raw'])
+            cpu_info.append(f"{info['hz_actual_friendly']}")
+            return ' / '.join(cpu_info)
+        except:
+            return "CPU: UNKNOWN"
+def LM2_banner():
+        logo = """
+  _                 __ __  __            _     _            ___
+ | |               / _|  \/  |          | |   (_)          |__ \
+ | |     ___  __ _| |_| \  / | __ _  ___| |__  _ _ __   ___   ) |
+ | |    / _ \/ _` |  _| |\/| |/ _` |/ __| '_ \| | '_ \ / _ \ / /
+ | |___|  __/ (_| | | | |  | | (_| | (__| | | | | | | |  __// /_
+ |______\___|\__,_|_| |_|  |_|\__,_|\___|_| |_|_|_| |_|\___|____|
+ __      __              _    _| |_   __      ___     _
+ \ \    / /             | |  |_   _|  \ \    / (_)   (_)
+  \ \  / /__  _   _  ___| |__  |_|_ _ _\ \  / / _ ___ _  ___  _ __
+   \ \/ / _ \| | | |/ __| '_ \ / _ \ '__\ \/ / | / __| |/ _ \| '_ \
+    \  / (_) | |_| | (__| | | |  __/ |   \  /  | \__ \ | (_) | | | |
+     \/ \___/ \__,_|\___|_| |_|\___|_|    \/   |_|___/_|\___/|_| |_|"""
+        return logo

vouchervision/LeafMachine2_Config_Builder.py ADDED Viewed

	@@ -0,0 +1,246 @@

+import os, yaml, platform
+def get_default_download_folder():
+    system_platform = platform.system()  # Gets the system platform, e.g., 'Linux', 'Windows', 'Darwin'
+    if system_platform == "Windows":
+        # Typically, the Downloads folder for Windows is in the user's profile folder
+        default_output_folder = os.path.join(os.getenv('USERPROFILE'), 'Downloads')
+    elif system_platform == "Darwin":
+        # Typically, the Downloads folder for macOS is in the user's home directory
+        default_output_folder = os.path.join(os.path.expanduser("~"), 'Downloads')
+    elif system_platform == "Linux":
+        # Typically, the Downloads folder for Linux is in the user's home directory
+        default_output_folder = os.path.join(os.path.expanduser("~"), 'Downloads')
+    else:
+        default_output_folder = "set/path/to/downloads/folder"
+        print("Please manually set the output folder")
+    return default_output_folder
+def build_LM2_config():
+    dir_home = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
+    # Initialize the base structure
+    config_data = {
+        'leafmachine': {}
+    }
+    # Modular sections to be added to 'leafmachine'
+    do_section = {
+        'check_for_illegal_filenames': True,
+        'check_for_corrupt_images_make_vertical': True,
+        'run_leaf_processing': True
+    }
+    print_section = {
+        'verbose': True,
+        'optional_warnings': True
+    }
+    logging_section = {
+        'log_level': None
+    }
+    default_output_folder = get_default_download_folder()
+    project_section = {
+        'dir_output': default_output_folder,
+        # 'dir_output': 'D:/D_Desktop/LM2',
+        'run_name': 'test',
+        'image_location': 'local',
+        'GBIF_mode': 'all',
+        'batch_size': 40,
+        'num_workers': 2,
+        'dir_images_local': '',
+        # 'dir_images_local': 'D:\Dropbox\LM2_Env\Image_Datasets\Manuscript_Images',
+        'path_combined_csv_local': None,
+        'path_occurrence_csv_local': None,
+        'path_images_csv_local': None,
+        'use_existing_plant_component_detections': None,
+        'use_existing_archival_component_detections': None,
+        'process_subset_of_images': False,
+        'dir_images_subset': '',
+        'n_images_per_species': 10,
+        'species_list': ''
+    }
+    cropped_components_section = {
+        'do_save_cropped_annotations': False,
+        'save_cropped_annotations': ['label'],
+        'save_per_image': False,
+        'save_per_annotation_class': True,
+        'binarize_labels': False,
+        'binarize_labels_skeletonize': False
+    }
+    modules_section = {
+        'armature': False,
+        'specimen_crop': False
+    }
+    data_section = {
+        'save_json_rulers': False,
+        'save_json_measurements': False,
+        'save_individual_csv_files_rulers': False,
+        'save_individual_csv_files_measurements': False,
+        'save_individual_csv_files_landmarks': False,
+        'save_individual_efd_files': False,
+        'include_darwin_core_data_from_combined_file': False,
+        'do_apply_conversion_factor': True
+    }
+    overlay_section = {
+        'save_overlay_to_pdf': False,
+        'save_overlay_to_jpgs': True,
+        'overlay_dpi': 300, # Between 100 to 300
+        'overlay_background_color': 'black', # Either 'white' or 'black'
+        'show_archival_detections': True,
+        'show_plant_detections': True,
+        'show_segmentations': True,
+        'show_landmarks': True,
+        'ignore_archival_detections_classes': [],
+        'ignore_plant_detections_classes': ['leaf_whole', 'specimen'], # Could also include 'leaf_partial' and others if needed
+        'ignore_landmark_classes': [],
+        'line_width_archival': 12, # Previous value given was 2
+        'line_width_plant': 12, # Previous value given was 6
+        'line_width_seg': 12, # 12 is specified as "thick"
+        'line_width_efd': 12, # 3 is specified as "thick" but 12 is given here
+        'alpha_transparency_archival': 0.3,
+        'alpha_transparency_plant': 0,
+        'alpha_transparency_seg_whole_leaf': 0.4,
+        'alpha_transparency_seg_partial_leaf': 0.3
+    }
+    plant_component_detector_section = {
+        'detector_type': 'Plant_Detector',
+        'detector_version': 'PLANT_GroupAB_200',
+        'detector_iteration': 'PLANT_GroupAB_200',
+        'detector_weights': 'best.pt',
+        'minimum_confidence_threshold': 0.3, # Default is 0.5
+        'do_save_prediction_overlay_images': True,
+        'ignore_objects_for_overlay': [] # 'leaf_partial' can be included if needed
+    }
+    archival_component_detector_section = {
+        'detector_type': 'Archival_Detector',
+        'detector_version': 'PREP_final',
+        'detector_iteration': 'PREP_final',
+        'detector_weights': 'best.pt',
+        'minimum_confidence_threshold': 0.5, # Default is 0.5
+        'do_save_prediction_overlay_images': True,
+        'ignore_objects_for_overlay': []
+    }
+    armature_component_detector_section = {
+        'detector_type': 'Armature_Detector',
+        'detector_version': 'ARM_A_1000',
+        'detector_iteration': 'ARM_A_1000',
+        'detector_weights': 'best.pt',
+        'minimum_confidence_threshold': 0.5, # Optionally: 0.2
+        'do_save_prediction_overlay_images': True,
+        'ignore_objects_for_overlay': []
+    }
+    landmark_detector_section = {
+        'landmark_whole_leaves': True,
+        'landmark_partial_leaves': False,
+        'detector_type': 'Landmark_Detector_YOLO',
+        'detector_version': 'Landmarks',
+        'detector_iteration': 'Landmarks_V2',
+        'detector_weights': 'best.pt',
+        'minimum_confidence_threshold': 0.02,
+        'do_save_prediction_overlay_images': True,
+        'ignore_objects_for_overlay': [],
+        'use_existing_landmark_detections': None, # Example path provided
+        'do_show_QC_images': False,
+        'do_save_QC_images': True,
+        'do_show_final_images': False,
+        'do_save_final_images': True
+    }
+    landmark_detector_armature_section = {
+        'upscale_factor': 10,
+        'detector_type': 'Landmark_Detector_YOLO',
+        'detector_version': 'Landmarks_Arm_A_200',
+        'detector_iteration': 'Landmarks_Arm_A_200',
+        'detector_weights': 'last.pt',
+        'minimum_confidence_threshold': 0.06,
+        'do_save_prediction_overlay_images': True,
+        'ignore_objects_for_overlay': [],
+        'use_existing_landmark_detections': None, # Example path provided
+        'do_show_QC_images': True,
+        'do_save_QC_images': True,
+        'do_show_final_images': True,
+        'do_save_final_images': True
+    }
+    ruler_detection_section = {
+        'detect_ruler_type': True,
+        'ruler_detector': 'ruler_classifier_38classes_v-1.pt',
+        'ruler_binary_detector': 'model_scripted_resnet_720_withCompression.pt',
+        'minimum_confidence_threshold': 0.4,
+        'save_ruler_validation': False,
+        'save_ruler_validation_summary': True,
+        'save_ruler_processed': False
+    }
+    leaf_segmentation_section = {
+        'segment_whole_leaves': True,
+        'segment_partial_leaves': False,
+        'keep_only_best_one_leaf_one_petiole': True,
+        'save_segmentation_overlay_images_to_pdf': True,
+        'save_each_segmentation_overlay_image': True,
+        'save_individual_overlay_images': True, # Not recommended due to potential file count
+        'overlay_line_width': 1, # Default is 1
+        'use_efds_for_png_masks': False, # Requires calculate_elliptic_fourier_descriptors to be True
+        'save_masks_color': True,
+        'save_full_image_masks_color': True,
+        'save_rgb_cropped_images': True,
+        'find_minimum_bounding_box': True,
+        'calculate_elliptic_fourier_descriptors': True, # Default is True
+        'elliptic_fourier_descriptor_order': 40, # Default is 40
+        'segmentation_model': 'GroupB_Dataset_100000_Iter_1176PTS_512Batch_smooth_l1_LR00025_BGR',
+        'minimum_confidence_threshold': 0.7, # Alternatively: 0.9
+        'generate_overlay': True,
+        'overlay_dpi': 300, # Range: 100 to 300
+        'overlay_background_color': 'black' # Options: 'white' or 'black'
+    }
+    # Add the sections to the 'leafmachine' key
+    config_data['leafmachine']['do'] = do_section
+    config_data['leafmachine']['print'] = print_section
+    config_data['leafmachine']['logging'] = logging_section
+    config_data['leafmachine']['project'] = project_section
+    config_data['leafmachine']['cropped_components'] = cropped_components_section
+    config_data['leafmachine']['modules'] = modules_section
+    config_data['leafmachine']['data'] = data_section
+    config_data['leafmachine']['overlay'] = overlay_section
+    config_data['leafmachine']['plant_component_detector'] = plant_component_detector_section
+    config_data['leafmachine']['archival_component_detector'] = archival_component_detector_section
+    config_data['leafmachine']['armature_component_detector'] = armature_component_detector_section
+    config_data['leafmachine']['landmark_detector'] = landmark_detector_section
+    config_data['leafmachine']['landmark_detector_armature'] = landmark_detector_armature_section
+    config_data['leafmachine']['ruler_detection'] = ruler_detection_section
+    config_data['leafmachine']['leaf_segmentation'] = leaf_segmentation_section
+    return config_data, dir_home
+def write_config_file(config_data, dir_home, filename="LeafMachine2.yaml"):
+    file_path = os.path.join(dir_home, filename)
+    # Write the data to a YAML file
+    with open(file_path, "w") as outfile:
+        yaml.dump(config_data, outfile, default_flow_style=False)
+if __name__ == '__main__':
+    config_data, dir_home = build_LM2_config()
+    write_config_file(config_data, dir_home)

vouchervision/OCR_google_cloud_vision.py ADDED Viewed

	@@ -0,0 +1,107 @@

+import os, io, sys, inspect
+from google.cloud import vision, storage
+from PIL import Image, ImageDraw
+currentdir = os.path.dirname(os.path.abspath(
+    inspect.getfile(inspect.currentframe())))
+parentdir = os.path.dirname(currentdir)
+sys.path.append(parentdir)
+def draw_boxes(image, bounds, color):
+    if bounds:
+        draw = ImageDraw.Draw(image)
+        width, height = image.size
+        line_width = int((width + height) / 2 * 0.001)  # This sets the line width as 0.5% of the average dimension
+        for bound in bounds:
+            draw.polygon(
+                [
+                    bound["vertices"][0]["x"], bound["vertices"][0]["y"],
+                    bound["vertices"][1]["x"], bound["vertices"][1]["y"],
+                    bound["vertices"][2]["x"], bound["vertices"][2]["y"],
+                    bound["vertices"][3]["x"], bound["vertices"][3]["y"],
+                ],
+                outline=color,
+                width=line_width
+            )
+    return image
+def detect_text(path):
+    client = vision.ImageAnnotatorClient()
+    with io.open(path, 'rb') as image_file:
+        content = image_file.read()
+    image = vision.Image(content=content)
+    response = client.document_text_detection(image=image)
+    texts = response.text_annotations
+    if response.error.message:
+        raise Exception(
+            '{}\nFor more info on error messages, check: '
+            'https://cloud.google.com/apis/design/errors'.format(
+                response.error.message))
+    # Extract bounding boxes
+    bounds = []
+    text_to_box_mapping = {}
+    for text in texts[1:]:  # Skip the first entry, as it represents the entire detected text
+        # Convert BoundingPoly to dictionary
+        bound_dict = {
+            "vertices": [
+                {"x": vertex.x, "y": vertex.y} for vertex in text.bounding_poly.vertices
+            ]
+        }
+        bounds.append(bound_dict)
+        text_to_box_mapping[str(bound_dict)] = text.description
+    if texts:
+        # cleaned_text = texts[0].description.replace("\n", " ").replace("\t", " ").replace("|", " ")
+        cleaned_text = texts[0].description
+        return cleaned_text, bounds, text_to_box_mapping
+    else:
+        return '', None, None
+def overlay_boxes_on_image(path, bounds):
+    image = Image.open(path)
+    draw_boxes(image, bounds, "green")
+    return image
+# ''' Google Vision'''
+# def detect_text(path):
+#     """Detects text in the file located in the local filesystem."""
+#     client = vision.ImageAnnotatorClient()
+#     with io.open(path, 'rb') as image_file:
+#         content = image_file.read()
+#     image = vision.Image(content=content)
+#     response = client.document_text_detection(image=image)
+#     texts = response.text_annotations
+#     if response.error.message:
+#         raise Exception(
+#             '{}\nFor more info on error messages, check: '
+#             'https://cloud.google.com/apis/design/errors'.format(
+#                 response.error.message))
+#     return texts[0].description if texts else ''

vouchervision/PaLM_example_script.py ADDED Viewed

	@@ -0,0 +1,70 @@

+"""
+At the command line, only need to run once to install the package via pip:
+$ pip install google-generativeai
+"""
+import google.generativeai as palm
+palm.configure(api_key="YOUR API KEY")
+defaults = {
+  'model': 'models/text-bison-001',
+  'temperature': 0,
+  'candidate_count': 1,
+  'top_k': 40,
+  'top_p': 0.95,
+  'max_output_tokens': 1024,
+  'stop_sequences': [],
+  'safety_settings': [{"category":"HARM_CATEGORY_DEROGATORY","threshold":1},{"category":"HARM_CATEGORY_TOXICITY","threshold":1},{"category":"HARM_CATEGORY_VIOLENCE","threshold":2},{"category":"HARM_CATEGORY_SEXUAL","threshold":2},{"category":"HARM_CATEGORY_MEDICAL","threshold":2},{"category":"HARM_CATEGORY_DANGEROUS","threshold":2}],
+}
+prompt = """1. Your job is to return a new dict based on the structure of the reference dict ref_dict and these are your rules.
+                    2. You must look at ref_dict and refactor the new text called OCR to match the same formatting.
+                    3. OCR contains unstructured text inside of [], use your knowledge to put the OCR text into the correct ref_dict column.
+                    4. If OCR is mostly empty and contains substantially less text than the ref_dict examples, then only return "None" and skip all other steps.
+                    5. If there is a field that does not have a direct proxy in the OCR text, you can fill it in based on your knowledge, but you cannot generate new information.
+                    6. Never put text from the ref_dict values into the new dict, but you must use the headers from ref_dict.
+                    7. There cannot be duplicate dictionary fields.
+                    8. Only return the new dict, do not explain your answer.
+    "Genus" - {"format": "[Genus]" or "[Family] indet" if no genus", "null_value": "", "description": taxonomic determination to genus, do captalize genus}
+    "Species"- {"format": "[species]" or "indet" if no species, "null_value": "", "description": taxonomic determination to species, do not captalize species}
+    "subspecies" - {"format": "[subspecies]", "null_value": "", "description": taxonomic determination to subspecies (subsp.)}
+    "variety" - {"format": "[variety]", "null_value": "", "description": taxonomic determination to variety (var)}
+    "forma" - {"format": "[form]", "null_value": "", "description": taxonomic determination to form (f.)}
+    "Country" - {"format": "[Country]", "null_value": "no data", "description": Country that corresponds to the current geographic location of collection; capitalize first letter of each word; use the entire location name even if an abreviation is given}
+    "State" - {"format": "[Adm. Division 1]", "null_value": "no data", "description": Administrative division 1 that corresponds to the current geographic location of collection; capitalize first letter of each word}
+    "County" - {"format": "[Adm. Division 2]", "null_value": "no data", "description": Administrative division 2 that corresponds to the current geographic location of collection; capitalize first letter of each word}
+    "Locality Name" - {"format": "verbatim", if no geographic info: "no data provided on label of catalog no: [######]", or if illegible: "locality present but illegible/not translated for catalog no: #######", or if no named locality: "no named locality for catalog no: #######", "description": "Description of geographic location or landscape"}
+    "Min Elevation" - {format: "elevation integer", "null_value": "","description": Elevation or altitude in meters, convert from feet to meters if 'm' or 'meters' is not in the text and round to integer, default field for elevation if a range is not given}
+    "Max Elevation" - {format: "elevation integer", "null_value": "","description": Elevation or altitude in meters, convert from feet to meters if 'm' or 'meters' is not in the text and round to integer, maximum elevation if there are two elevations listed but '' otherwise}
+    "Elevation Units" - {format: "m", "null_value": "","description": "m" only if an elevation is present}
+    "Verbatim Coordinates" - {"format": "[Lat, Long | UTM | TRS]", "null_value": "", "description": Verbatim coordinates as they appear on the label, fix typos to match standardized GPS coordinate format}
+    "Datum" - {"format": "[WGS84, NAD23 etc.]", "null_value": "not present", "description": Datum of coordinates on label; "" is GPS coordinates are not in OCR}
+    "Cultivated" - {"format": "yes", "null_value": "", "description": Indicates if specimen was grown in cultivation}
+    "Habitat" - {"format": "verbatim", "null_value": "", "description": Description of habitat or location where specimen was collected, ignore descriptions of the plant itself}
+    "Collectors" - {"format": "[Collector]", "null_value": "not present", "description": Full name of person (i.e., agent) who collected the specimen; if more than one person then separate the names with commas}
+    "Collector Number" - {"format": "[Collector No.]", "null_value": "s.n.", "description": Sequential number assigned to collection, associated with the collector}
+    "Verbatim Date" - {"format": "verbatim", "null_value": "s.d.", "description": Date of collection exactly as it appears on the label}
+    "Date" - {"format": "[yyyy-mm-dd]", "null_value": "", "description": Date of collection formatted as year, month, and day; zeros may be used for unknown values i.e. 0000-00-00 if no date, YYYY-00-00 if only year, YYYY-MM-00 if no day}
+    "End Date" - {"format": "[yyyy-mm-dd]", "null_value": "", "description": If date range is listed, later date of collection range}
+input: El Kala   Algeria Aegilops El Tarf    1919-05-20 locality not transcribed for catalog no: 1702723  Charles d'Alleizette ovata May 20, 1919  s.n.
+output: {"Genus": "Aegilops", "Species": "ovata", "subspecies": "", "variety": "", "forma": "", "Country": "Algeria", "State": "El Tarf", "County": "El Kala", "Locality Name": "locality not transcribed for catalog no: 1702723", "Min Elevation": "", "Max Elevation": "", "Elevation Units": "", "Verbatim Coordinates": "", "Datum": "", "Cultivated": "", "Habitat": "", "Collectors": "Charles d'Alleizette", "Collector Number": "s.n.", "Verbatim Date": "May 20, 1919", "Date": "1919-05-20", "End Date": ""}
+input: El Kala   Algeria Agrostis El Tarf    1918-06-08 locality not transcribed for catalog no: 1702919  Charles d'Alleizette pallida 8 Juin 1918  7748
+output: {"Genus": "Agrostis", "Species": "pallida", "subspecies": "", "variety": "", "forma": "", "Country": "Algeria", "State": "El Tarf", "County": "El Kala", "Locality Name": "locality not transcribed for catalog no: 1702919", "Min Elevation": "", "Max Elevation": "", "Elevation Units": "", "Verbatim Coordinates": "", "Datum": "", "Cultivated": "", "Habitat": "", "Collectors": "Charles d'Alleizette", "Collector Number": "7748", "Verbatim Date": "8 Juin 1918", "Date": "1918-06-08", "End Date": ""}
+input: Gympie  river nr. sawmill Australia Hydrilla Queensland    1943-12-26 locality not transcribed for catalog no: 1702580  M. S. Clemens verticillata Dec. 26/43  43329
+output:"""
+response = palm.generate_text(
+  **defaults,
+  prompt=prompt
+)
+print(response.result)

vouchervision/VoucherVision_Config_Builder.py ADDED Viewed

	@@ -0,0 +1,576 @@

+import os, yaml, platform, traceback
+from vouchervision.LeafMachine2_Config_Builder import get_default_download_folder, write_config_file
+from vouchervision.general_utils import validate_dir, print_main_fail
+from vouchervision.vouchervision_main import voucher_vision
+from general_utils import get_cfg_from_full_path
+def build_VV_config():
+    #############################################
+    ############ Set common defaults ############
+    #############################################
+    # Changing the values below will set new
+    # default values each time you open the
+    # VoucherVision user interface
+    #############################################
+    #############################################
+    #############################################
+    dir_home = os.path.dirname(os.path.dirname(__file__))
+    run_name = 'test'
+    # dir_images_local = 'D:/Dropbox/LM2_Env/Image_Datasets/GBIF_BroadSample_3SppPerFamily1'
+    dir_images_local = os.path.join(dir_home,'demo','demo_images')
+    # The default output location is the computer's "Downloads" folder
+    #    You can set dir_output directly by typing the folder path,
+    # OR you can uncomment the line "dir_output = default_output_folder"
+    #    to have VoucherVision save to the Downloads folder by default
+    default_output_folder = get_default_download_folder()
+    dir_output = default_output_folder
+    # dir_output = 'D:/D_Desktop/LM2'
+    prefix_removal = '' #'MICH-V-'
+    suffix_removal = ''
+    catalog_numerical_only = False
+    LLM_version_user = 'Azure GPT 4'
+    prompt_version = 'Version 2' # from ["Version 1", "Version 1 No Domain Knowledge", "Version 2"]
+    use_LeafMachine2_collage_images = False # Use LeafMachine2 collage images
+    batch_size = 500
+    path_domain_knowledge = os.path.join(dir_home,'domain_knowledge','SLTP_UM_AllAsiaMinimalInRegion.xlsx')
+    embeddings_database_name = os.path.splitext(os.path.basename(path_domain_knowledge))[0]
+    #############################################
+    #############################################
+    ########## DO NOT EDIT BELOW HERE ###########
+    #############################################
+    #############################################
+    return assemble_config(dir_home, run_name, dir_images_local,dir_output,
+                    prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
+                    path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
+                    prompt_version, use_domain_knowledge=False)
+def assemble_config(dir_home, run_name, dir_images_local,dir_output,
+                    prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
+                    path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
+                    prompt_version, use_domain_knowledge=False):
+    # Initialize the base structure
+    config_data = {
+        'leafmachine': {}
+    }
+    # Modular sections to be added to 'leafmachine'
+    do_section = {
+        'check_for_illegal_filenames': False,
+        'check_for_corrupt_images_make_vertical': True,
+    }
+    print_section = {
+        'verbose': True,
+        'optional_warnings': True
+    }
+    logging_section = {
+        'log_level': None
+    }
+    project_section = {
+        'dir_output': dir_output,
+        'run_name': run_name,
+        'image_location': 'local',
+        'batch_size': batch_size,
+        'num_workers': 1,
+        'dir_images_local': dir_images_local,
+        'continue_run_from_partial_xlsx': '',
+        'prefix_removal': prefix_removal,
+        'suffix_removal': suffix_removal,
+        'catalog_numerical_only': catalog_numerical_only,
+        'use_domain_knowledge': use_domain_knowledge,
+        'embeddings_database_name': embeddings_database_name,
+        'build_new_embeddings_database': False,
+        'path_to_domain_knowledge_xlsx': path_domain_knowledge,
+        'prompt_version': prompt_version,
+        'delete_all_temps': False,
+        'delete_temps_keep_VVE': False,
+    }
+    modules_section = {
+        'specimen_crop': True
+    }
+    LLM_version = LLM_version_user
+    use_RGB_label_images = use_LeafMachine2_collage_images # Use LeafMachine2 collage images
+    cropped_components_section = {
+        'do_save_cropped_annotations': True,
+        'save_cropped_annotations': ['label','barcode'],
+        'save_per_image': False,
+        'save_per_annotation_class': True,
+        'binarize_labels': False,
+        'binarize_labels_skeletonize': False
+    }
+    data_section = {
+        'save_json_rulers': False,
+        'save_json_measurements': False,
+        'save_individual_csv_files_rulers': False,
+        'save_individual_csv_files_measurements': False,
+        'save_individual_csv_files_landmarks': False,
+        'save_individual_efd_files': False,
+        'include_darwin_core_data_from_combined_file': False,
+        'do_apply_conversion_factor': False
+    }
+    overlay_section = {
+        'save_overlay_to_pdf': False,
+        'save_overlay_to_jpgs': True,
+        'overlay_dpi': 300, # Between 100 to 300
+        'overlay_background_color': 'black', # Either 'white' or 'black'
+        'show_archival_detections': True,
+        'show_plant_detections': True,
+        'show_segmentations': True,
+        'show_landmarks': True,
+        'ignore_archival_detections_classes': [],
+        'ignore_plant_detections_classes': ['leaf_whole', 'specimen'], # Could also include 'leaf_partial' and others if needed
+        'ignore_landmark_classes': [],
+        'line_width_archival': 12, # Previous value given was 2
+        'line_width_plant': 12, # Previous value given was 6
+        'line_width_seg': 12, # 12 is specified as "thick"
+        'line_width_efd': 12, # 3 is specified as "thick" but 12 is given here
+        'alpha_transparency_archival': 0.3,
+        'alpha_transparency_plant': 0,
+        'alpha_transparency_seg_whole_leaf': 0.4,
+        'alpha_transparency_seg_partial_leaf': 0.3
+    }
+    archival_component_detector_section = {
+        'detector_type': 'Archival_Detector',
+        'detector_version': 'PREP_final',
+        'detector_iteration': 'PREP_final',
+        'detector_weights': 'best.pt',
+        'minimum_confidence_threshold': 0.5, # Default is 0.5
+        'do_save_prediction_overlay_images': True,
+        'ignore_objects_for_overlay': []
+    }
+    # Add the sections to the 'leafmachine' key
+    config_data['leafmachine']['do'] = do_section
+    config_data['leafmachine']['print'] = print_section
+    config_data['leafmachine']['logging'] = logging_section
+    config_data['leafmachine']['project'] = project_section
+    config_data['leafmachine']['LLM_version'] = LLM_version
+    config_data['leafmachine']['use_RGB_label_images'] = use_RGB_label_images
+    config_data['leafmachine']['cropped_components'] = cropped_components_section
+    config_data['leafmachine']['modules'] = modules_section
+    config_data['leafmachine']['data'] = data_section
+    config_data['leafmachine']['overlay'] = overlay_section
+    config_data['leafmachine']['archival_component_detector'] = archival_component_detector_section
+    return config_data, dir_home
+def build_api_tests(api):
+    dir_home = os.path.dirname(os.path.dirname(__file__))
+    path_to_configs = os.path.join(dir_home,'demo','demo_configs')
+    dir_home = os.path.dirname(os.path.dirname(__file__))
+    dir_images_local = os.path.join(dir_home,'demo','demo_images')
+    validate_dir(os.path.join(dir_home,'demo','demo_configs'))
+    path_domain_knowledge = os.path.join(dir_home,'domain_knowledge','SLTP_UM_AllAsiaMinimalInRegion.xlsx')
+    embeddings_database_name = os.path.splitext(os.path.basename(path_domain_knowledge))[0]
+    prefix_removal = ''
+    suffix_removal = ''
+    catalog_numerical_only = False
+    batch_size = 500
+    # ### Option 1: "GPT 4" of ["GPT 4", "GPT 3.5", "Azure GPT 4", "Azure GPT 3.5", "PaLM 2"]
+    # LLM_version_user = 'Azure GPT 4'
+    # ### Option 2: False of [False, True]
+    # use_LeafMachine2_collage_images = False
+    # ### Option 3: False of [False, True]
+    # use_domain_knowledge = True
+    test_results = {}
+    if api == 'openai':
+        OPT1, OPT2, OPT3 = TestOptionsAPI_openai.get_options()
+    elif api == 'palm':
+        OPT1, OPT2, OPT3 = TestOptionsAPI_palm.get_options()
+    elif api == 'azure_openai':
+        OPT1, OPT2, OPT3 = TestOptionsAPI_azure_openai.get_options()
+    else:
+        raise
+    ind = -1
+    ind_opt1 = -1
+    ind_opt2 = -1
+    ind_opt3 = -1
+    for opt1 in OPT1:
+        ind_opt1+= 1
+        for opt2 in OPT2:
+            ind_opt2 += 1
+            for opt3 in OPT3:
+                ind += 1
+                ind_opt3 += 1
+                LLM_version_user = opt1
+                use_LeafMachine2_collage_images = opt2
+                prompt_version = opt3
+                filename = f"{ind}__OPT1-{ind_opt1}__OPT2-{ind_opt2}__OPT3-{ind_opt3}.yaml"
+                run_name = f"{ind}__OPT1-{ind_opt1}__OPT2-{ind_opt2}__OPT3-{ind_opt3}"
+                dir_output = os.path.join(dir_home,'demo','demo_output','run_name')
+                validate_dir(dir_output)
+                config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
+                    prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
+                    path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
+                    prompt_version)
+                write_config_file(config_data, os.path.join(dir_home,'demo','demo_configs'),filename=filename)
+                test_results[run_name] = False
+            ind_opt3 = -1
+        ind_opt2 = -1
+    ind_opt1 = -1
+    return dir_home, path_to_configs, test_results
+def build_demo_tests(llm_version):
+    dir_home = os.path.dirname(os.path.dirname(__file__))
+    path_to_configs = os.path.join(dir_home,'demo','demo_configs')
+    dir_home = os.path.dirname(os.path.dirname(__file__))
+    dir_images_local = os.path.join(dir_home,'demo','demo_images')
+    validate_dir(os.path.join(dir_home,'demo','demo_configs'))
+    path_domain_knowledge = os.path.join(dir_home,'domain_knowledge','SLTP_UM_AllAsiaMinimalInRegion.xlsx')
+    embeddings_database_name = os.path.splitext(os.path.basename(path_domain_knowledge))[0]
+    prefix_removal = ''
+    suffix_removal = ''
+    catalog_numerical_only = False
+    batch_size = 500
+    # ### Option 1: "GPT 4" of ["GPT 4", "GPT 3.5", "Azure GPT 4", "Azure GPT 3.5", "PaLM 2"]
+    # LLM_version_user = 'Azure GPT 4'
+    # ### Option 2: False of [False, True]
+    # use_LeafMachine2_collage_images = False
+    # ### Option 3: False of [False, True]
+    # use_domain_knowledge = True
+    test_results = {}
+    if llm_version == 'gpt':
+        OPT1, OPT2, OPT3 = TestOptionsGPT.get_options()
+    elif llm_version == 'palm':
+        OPT1, OPT2, OPT3 = TestOptionsPalm.get_options()
+    else:
+        raise
+    ind = -1
+    ind_opt1 = -1
+    ind_opt2 = -1
+    ind_opt3 = -1
+    for opt1 in OPT1:
+        ind_opt1+= 1
+        for opt2 in OPT2:
+            ind_opt2 += 1
+            for opt3 in OPT3:
+                ind += 1
+                ind_opt3 += 1
+                LLM_version_user = opt1
+                use_LeafMachine2_collage_images = opt2
+                prompt_version = opt3
+                filename = f"{ind}__OPT1-{ind_opt1}__OPT2-{ind_opt2}__OPT3-{ind_opt3}.yaml"
+                run_name = f"{ind}__OPT1-{ind_opt1}__OPT2-{ind_opt2}__OPT3-{ind_opt3}"
+                dir_output = os.path.join(dir_home,'demo','demo_output','run_name')
+                validate_dir(dir_output)
+                if llm_version == 'gpt':
+                    if prompt_version in ['Version 1']:
+                        config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
+                            prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
+                            path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
+                            prompt_version, use_domain_knowledge=True)
+                    else:
+                        config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
+                            prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
+                            path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
+                            prompt_version)
+                elif llm_version == 'palm':
+                    if prompt_version in ['Version 1 PaLM 2']:
+                        config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
+                            prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
+                            path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
+                            prompt_version, use_domain_knowledge=True)
+                    else:
+                        config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
+                            prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
+                            path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
+                            prompt_version)
+                write_config_file(config_data, os.path.join(dir_home,'demo','demo_configs'),filename=filename)
+                test_results[run_name] = False
+            ind_opt3 = -1
+        ind_opt2 = -1
+    ind_opt1 = -1
+    return dir_home, path_to_configs, test_results
+class TestOptionsGPT:
+    OPT1 = ["GPT 4", "GPT 3.5", "Azure GPT 4", "Azure GPT 3.5"]
+    OPT2 = [False, True]
+    OPT3 = ["Version 1", "Version 1 No Domain Knowledge", "Version 2"]
+    @classmethod
+    def get_options(cls):
+        return cls.OPT1, cls.OPT2, cls.OPT3
+    @classmethod
+    def get_length(cls):
+        return 24
+class TestOptionsPalm:
+    OPT1 = ["PaLM 2"]
+    OPT2 = [False, True]
+    OPT3 = ["Version 1 PaLM 2", "Version 1 PaLM 2 No Domain Knowledge", "Version 2 PaLM 2"]
+    @classmethod
+    def get_options(cls):
+        return cls.OPT1, cls.OPT2, cls.OPT3
+    @classmethod
+    def get_length(cls):
+        return 6
+class TestOptionsAPI_openai:
+    OPT1 = ["GPT 3.5"]
+    OPT2 = [False]
+    OPT3 = ["Version 2"]
+    @classmethod
+    def get_options(cls):
+        return cls.OPT1, cls.OPT2, cls.OPT3
+    @classmethod
+    def get_length(cls):
+        return 24
+class TestOptionsAPI_azure_openai:
+    OPT1 = ["Azure GPT 3.5"]
+    OPT2 = [False]
+    OPT3 = ["Version 2"]
+    @classmethod
+    def get_options(cls):
+        return cls.OPT1, cls.OPT2, cls.OPT3
+    @classmethod
+    def get_length(cls):
+        return 24
+class TestOptionsAPI_palm:
+    OPT1 = ["PaLM 2"]
+    OPT2 = [False]
+    OPT3 = ["Version 2 PaLM 2"]
+    @classmethod
+    def get_options(cls):
+        return cls.OPT1, cls.OPT2, cls.OPT3
+    @classmethod
+    def get_length(cls):
+        return 6
+def run_demo_tests_GPT(progress_report):
+    dir_home, path_to_configs, test_results = build_demo_tests('gpt')
+    progress_report.set_n_overall(len(test_results.items()))
+    JSON_results = {}
+    for ind, (cfg, result) in enumerate(test_results.items()):
+        OPT1, OPT2, OPT3 = TestOptionsGPT.get_options()
+        test_ind, ind_opt1, ind_opt2, ind_opt3 = cfg.split('__')
+        opt1_readable = OPT1[int(ind_opt1.split('-')[1])]
+        if opt1_readable in ["Azure GPT 4", "Azure GPT 3.5"]:
+            api_version = 'gpt-azure'
+        elif opt1_readable in ["GPT 4", "GPT 3.5"]:
+            api_version = 'gpt'
+        else:
+            raise
+        opt2_readable = "Use LeafMachine2 for Collage Images" if OPT2[int(ind_opt2.split('-')[1])] else "Don't use LeafMachine2 for Collage Images"
+        opt3_readable = f"Prompt {OPT3[int(ind_opt3.split('-')[1])]}"
+        # Construct the human-readable test name
+        human_readable_name = f"{opt1_readable}, {opt2_readable}, {opt3_readable}"
+        get_n_overall = progress_report.get_n_overall()
+        progress_report.update_overall(f"Test {int(test_ind)+1} of {get_n_overall} --- Validating {human_readable_name}")
+        print_main_fail(f"Starting validation test: {human_readable_name}")
+        cfg_file_path = os.path.join(path_to_configs,'.'.join([cfg,'yaml']))
+        if check_API_key(dir_home, api_version) and check_API_key(dir_home, 'google-vision-ocr'):
+            try:
+                last_JSON_response, total_cost = voucher_vision(cfg_file_path, dir_home, cfg_test=None, progress_report=progress_report, test_ind=int(test_ind))
+                test_results[cfg] = True
+                JSON_results[ind] = last_JSON_response
+            except Exception as e:
+                JSON_results[ind] = None
+                test_results[cfg] = False
+                print(f"An exception occurred: {e}")
+                traceback.print_exc()  # This will print the full traceback
+        else:
+            fail_response = ''
+            if not check_API_key(dir_home, 'google-vision-ocr'):
+                fail_response += "No API key found for Google Vision OCR"
+            if not check_API_key(dir_home, api_version):
+                fail_response += f"  +  No API key found for {api_version}"
+            test_results[cfg] = False
+            JSON_results[ind] = fail_response
+            print(f"No API key found for {fail_response}")
+    return test_results, JSON_results
+def run_demo_tests_Palm(progress_report):
+    api_version = 'palm'
+    dir_home, path_to_configs, test_results = build_demo_tests('palm')
+    progress_report.set_n_overall(len(test_results.items()))
+    JSON_results = {}
+    for ind, (cfg, result) in enumerate(test_results.items()):
+        OPT1, OPT2, OPT3 = TestOptionsPalm.get_options()
+        test_ind, ind_opt1, ind_opt2, ind_opt3 = cfg.split('__')
+        opt1_readable = OPT1[int(ind_opt1.split('-')[1])]
+        opt2_readable = "Use LeafMachine2 for Collage Images" if OPT2[int(ind_opt2.split('-')[1])] else "Don't use LeafMachine2 for Collage Images"
+        opt3_readable = f"Prompt {OPT3[int(ind_opt3.split('-')[1])]}"
+        # opt3_readable = "Use Domain Knowledge" if OPT3[int(ind_opt3.split('-')[1])] else "Don't use Domain Knowledge"
+        # Construct the human-readable test name
+        human_readable_name = f"{opt1_readable}, {opt2_readable}, {opt3_readable}"
+        get_n_overall = progress_report.get_n_overall()
+        progress_report.update_overall(f"Test {int(test_ind)+1} of {get_n_overall} --- Validating {human_readable_name}")
+        print_main_fail(f"Starting validation test: {human_readable_name}")
+        cfg_file_path = os.path.join(path_to_configs,'.'.join([cfg,'yaml']))
+        if check_API_key(dir_home, api_version) and check_API_key(dir_home, 'google-vision-ocr') :
+            try:
+                last_JSON_response, total_cost = voucher_vision(cfg_file_path, dir_home, cfg_test=None, progress_report=progress_report, test_ind=int(test_ind))
+                test_results[cfg] = True
+                JSON_results[ind] = last_JSON_response
+            except Exception as e:
+                test_results[cfg] = False
+                JSON_results[ind] = None
+                print(f"An exception occurred: {e}")
+                traceback.print_exc()  # This will print the full traceback
+        else:
+            fail_response = ''
+            if not check_API_key(dir_home, 'google-vision-ocr'):
+                fail_response += "No API key found for Google Vision OCR"
+            if not check_API_key(dir_home, api_version):
+                fail_response += f"  +  No API key found for {api_version}"
+            test_results[cfg] = False
+            JSON_results[ind] = fail_response
+            print(f"No API key found for {fail_response}")
+    return test_results, JSON_results
+def run_api_tests(api):
+    try:
+        dir_home, path_to_configs, test_results = build_api_tests(api)
+        JSON_results = {}
+        for ind, (cfg, result) in enumerate(test_results.items()):
+            if api == 'openai':
+                OPT1, OPT2, OPT3 = TestOptionsAPI_openai.get_options()
+            elif 'azure_openai':
+                OPT1, OPT2, OPT3 = TestOptionsAPI_azure_openai.get_options()
+            elif 'palm':
+                OPT1, OPT2, OPT3 = TestOptionsAPI_palm.get_options()
+            test_ind, ind_opt1, ind_opt2, ind_opt3 = cfg.split('__')
+            opt1_readable = OPT1[int(ind_opt1.split('-')[1])]
+            opt2_readable = "Use LeafMachine2 for Collage Images" if OPT2[int(ind_opt2.split('-')[1])] else "Don't use LeafMachine2 for Collage Images"
+            opt3_readable = f"Prompt {OPT3[int(ind_opt3.split('-')[1])]}"
+            # opt3_readable = "Use Domain Knowledge" if OPT3[int(ind_opt3.split('-')[1])] else "Don't use Domain Knowledge"
+            # Construct the human-readable test name
+            human_readable_name = f"{opt1_readable}, {opt2_readable}, {opt3_readable}"
+            print_main_fail(f"Starting validation test: {human_readable_name}")
+            cfg_file_path = os.path.join(path_to_configs,'.'.join([cfg,'yaml']))
+            if check_API_key(dir_home, api) and check_API_key(dir_home, 'google-vision-ocr') :
+                try:
+                    last_JSON_response, total_cost = voucher_vision(cfg_file_path, dir_home, None, cfg_test=None, progress_report=None, test_ind=int(test_ind))
+                    test_results[cfg] = True
+                    JSON_results[ind] = last_JSON_response
+                    return True
+                except Exception as e:
+                    print(e)
+                    return False
+            else:
+                return False
+    except Exception as e:
+        print(e)
+        return False
+def has_API_key(val):
+        if val != '':
+            return True
+        else:
+            return False
+def check_if_usable():
+    dir_home = os.path.dirname(os.path.dirname(__file__))
+    path_cfg_private = os.path.join(dir_home, 'PRIVATE_DATA.yaml')
+    cfg_private = get_cfg_from_full_path(path_cfg_private)
+    has_key_openai = has_API_key(cfg_private['openai']['OPENAI_API_KEY'])
+    has_key_azure_openai = has_API_key(cfg_private['openai_azure']['api_version'])
+    has_key_palm2 = has_API_key(cfg_private['google_palm']['google_palm_api'])
+    has_key_google_OCR = has_API_key(cfg_private['google_cloud']['path_json_file'])
+    if has_key_google_OCR and (has_key_azure_openai or has_key_openai or has_key_palm2):
+        return True
+    else:
+        return False
+def check_API_key(dir_home, api_version):
+    dir_home = os.path.dirname(os.path.dirname(__file__))
+    path_cfg_private = os.path.join(dir_home, 'PRIVATE_DATA.yaml')
+    cfg_private = get_cfg_from_full_path(path_cfg_private)
+    has_key_openai = has_API_key(cfg_private['openai']['OPENAI_API_KEY'])
+    has_key_azure_openai = has_API_key(cfg_private['openai_azure']['api_version'])
+    has_key_palm2 = has_API_key(cfg_private['google_palm']['google_palm_api'])
+    has_key_google_OCR = has_API_key(cfg_private['google_cloud']['path_json_file'])
+    if api_version == 'palm' and has_key_palm2:
+        return True
+    elif api_version in ['gpt','openai'] and has_key_openai:
+        return True
+    elif api_version in ['gpt-azure', 'azure_openai'] and has_key_azure_openai:
+        return True
+    elif api_version == 'google-vision-ocr' and has_key_google_OCR:
+        return True
+    else:
+        return False

vouchervision/component_detector/LICENSE ADDED Viewed

	@@ -0,0 +1,674 @@

+GNU GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+                            Preamble
+  The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.  We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors.  You can apply it to
+your programs, too.
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+  To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights.  Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received.  You must make sure that they, too, receive
+or can get the source code.  And you must show them these terms so they
+know their rights.
+  Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+  For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software.  For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+  Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so.  This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software.  The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable.  Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products.  If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+  Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary.  To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+  The precise terms and conditions for copying, distribution and
+modification follow.
+                       TERMS AND CONDITIONS
+  0. Definitions.
+  "This License" refers to version 3 of the GNU General Public License.
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+  1. Source Code.
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+  The Corresponding Source for a work in source code form is that
+same work.
+  2. Basic Permissions.
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+  4. Conveying Verbatim Copies.
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+  5. Conveying Modified Source Versions.
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+  6. Conveying Non-Source Forms.
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+  7. Additional Terms.
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+  8. Termination.
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+  9. Acceptance Not Required for Having Copies.
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+  10. Automatic Licensing of Downstream Recipients.
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+  11. Patents.
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+  12. No Surrender of Others' Freedom.
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+  13. Use with the GNU Affero General Public License.
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+  14. Revised Versions of this License.
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+  If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+  15. Disclaimer of Warranty.
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+  16. Limitation of Liability.
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+  17. Interpretation of Sections 15 and 16.
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+                     END OF TERMS AND CONDITIONS
+            How to Apply These Terms to Your New Programs
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+Also add information on how to contact you by electronic and paper mail.
+  If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+    <program>  Copyright (C) <year>  <name of author>
+    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<http://www.gnu.org/licenses/>.
+  The GNU General Public License does not permit incorporating your program
+into proprietary programs.  If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.  But first, please read
+<http://www.gnu.org/philosophy/why-not-lgpl.html>.

vouchervision/component_detector/__init__.py ADDED Viewed

File without changes

vouchervision/component_detector/armature_processing.py ADDED Viewed

	@@ -0,0 +1,1047 @@

+import os, math, cv2, random
+import numpy as np
+from itertools import combinations
+from PIL import Image
+from dataclasses import dataclass, field
+from typing import List, Dict
+from sklearn.linear_model import LinearRegression
+from scipy.optimize import fsolve, minimize
+@dataclass()
+class ArmatureSkeleton:
+    cfg: str
+    Dirs: str
+    leaf_type: str
+    all_points: list
+    dir_temp: str
+    file_name: str
+    width: int
+    height: int
+    logger: object
+    is_complete: bool = False
+    keep_going: bool = False
+    do_show_QC_images: bool = False
+    do_save_QC_images: bool = False
+    classes: int = 0
+    points_list: int = 0
+    image: int = 0
+    ordered_middle: int = 0
+    midvein_fit: int = 0
+    midvein_fit_points: int = 0
+    ordered_midvein_length: float = 0.0
+    has_middle = False
+    has_outer = False
+    has_tip = False
+    is_split = False
+    ordered_petiole: int = 0
+    ordered_petiole_length: float = 0.0
+    has_ordered_petiole = False
+    has_apex: bool = False
+    apex_left: int = 0
+    apex_right: int = 0
+    apex_center: int = 0
+    apex_angle_type: str = 'NA'
+    apex_angle_degrees: float = 0.0
+    has_base: bool = False
+    base_left: int = 0
+    base_right: int = 0
+    base_center: int = 0
+    base_angle_type: str = 'NA'
+    base_angle_degrees: float = 0.0
+    has_lamina_base: bool = False
+    lamina_base: int = 0
+    has_lamina_length: bool = False
+    lamina_fit: int = 0
+    lamina_length: float = 0.0
+    has_width: bool = False
+    lamina_width: float = 0.0
+    width_left: float = 0.0
+    width_right: float = 0.0
+    def __init__(self, cfg, logger, Dirs, leaf_type, all_points, height, width, dir_temp, file_name) -> None:
+        # Store the necessary arguments as instance attributes
+        self.cfg = cfg
+        self.Dirs = Dirs
+        self.leaf_type = leaf_type
+        self.all_points = all_points
+        self.height = height
+        self.width = width
+        self.dir_temp = dir_temp
+        self.file_name = file_name
+        logger.name = f'[{leaf_type} - {file_name}]'
+        self.logger = logger
+        self.init_lists_dicts()
+        """ Setup """
+        self.set_cfg_values()
+        self.define_landmark_classes()
+        self.setup_QC_image()
+        self.setup_angle_image()
+        self.setup_final_image()
+        self.parse_all_points()
+        self.convert_YOLO_bbox_to_point()
+        if (len(self.points_list['outer']) > 6) and (len(self.points_list['middle']) > 3):
+            self.keep_going = True
+        """ Landmarks """
+        if self.keep_going:
+            # Start with ordering the midvein and petiole
+            self.order_middle()
+            # print(self.ordered_midvein)
+        if self.keep_going:
+            # Split the image using the midvein IF has_midvein == True
+            self.split_image_by_middle()
+        if self.keep_going:
+            self.group_outer_points()
+        if self.keep_going:
+            # Measure
+            self.measure_armature()
+        if self.keep_going:
+            # calc tangent angle of outer and inner polys
+            self.calc_angle_tangent()
+        if self.keep_going:
+            self.calc_angle_curl()
+        if self.keep_going:
+            # self.calc_angle_bend()
+            self.calc_curvature_radius()
+        if self.keep_going:
+            self.calc_direct_length()
+            # self.show_QC_image()
+            # self.show_angle_image()
+            self.is_complete = True # TODO  add ways to set True
+    def measure_armature(self):
+        # wb = width_base = line between the last outer and inner points
+        # Define the line function
+        def line_func(x):
+            return self.wb_slope * x + self.wb_intercept
+        def middle_func(x):
+            return self.middle_poly[0]*x**2 + self.middle_poly[1]*x + self.middle_poly[2]
+        # Define the difference function
+        def line_middle_diff(x):
+            return line_func(x) - middle_func(x)
+        # Convert the points to numpy arrays
+        last_point_right = np.array(self.last_point_right)
+        last_point_left = np.array(self.last_point_left)
+        # Calculate the Euclidean distance between the points
+        self.width_base = np.linalg.norm(last_point_right - last_point_left)
+        print("The distance between the last points of the right and left segments is:", self.width_base)
+        # Intersection of the width and the middlepoly# Draw a line between the last points of the outer_left and outer_right segments
+        cv2.line(self.image, (int(self.last_point_left[0]), int(self.last_point_left[1])), (int(self.last_point_right[0]), int(self.last_point_right[1])), gc('white'), thickness=2)
+        cv2.line(self.image_angles, (int(self.last_point_left[0]), int(self.last_point_left[1])), (int(self.last_point_right[0]), int(self.last_point_right[1])), color=gc('white'), thickness=2)
+        # Calculate the slope and y-intercept of the line
+        self.wb_slope = (self.last_point_right[1] - self.last_point_left[1]) / (self.last_point_right[0] - self.last_point_left[0])
+        self.wb_intercept = self.last_point_left[1] - self.wb_slope * self.last_point_left[0]
+        # Find the intersection point
+        intersection_x = fsolve(line_middle_diff, 0)[0]
+        intersection_y = line_func(intersection_x)
+        self.width_base_inter = [(int(intersection_x), int(intersection_y))]
+        # Calculate the midpoint between the last points
+        self.width_base_mid = (last_point_right + last_point_left) / 2
+        cv2.circle(self.image, (int(intersection_x), int(intersection_y)), radius=2, color=gc('green'), thickness=-1)
+        cv2.circle(self.image, (int(intersection_x), int(intersection_y)), radius=4, color=gc('black'), thickness=2)
+        cv2.circle(self.image, (int(self.width_base_mid[0]), int(self.width_base_mid[1])), radius=2, color=gc('red'), thickness=-1)
+        cv2.circle(self.image, (int(self.width_base_mid[0]), int(self.width_base_mid[1])), radius=4, color=gc('black'), thickness=2)
+        print("The intersection point of the line and the middle polynomial is:", (intersection_x, intersection_y))
+    def calc_direct_length(self):
+        # Calculate the x-coordinate of the intersection point
+        x_intersection = (self.wb_intercept_perpendicular - self.wb_intercept) / (self.wb_slope - self.wb_slope_perpendicular)
+        # Calculate the y-coordinate of the intersection point
+        y_intersection = self.wb_slope * x_intersection + self.wb_intercept
+        # Store the intersection point as self.wb_origin
+        self.wb_origin = np.array([x_intersection, y_intersection])
+        # Calculate the distance between the intersection point and self.inter_point
+        self.length_direct = np.linalg.norm(self.wb_origin - self.inter_point)
+        # Plot a 2-pixel thick red line from self.wb_origin to self.inter_point
+        cv2.line(self.image_angles, tuple(map(int, self.wb_origin)), tuple(map(int, self.inter_point)), gc('red'), thickness=2)
+    def calc_curvature_radius(self):
+        def fit_circle_least_squares(points):
+            if len(points) <= 1:
+                return 0.0, (0, 0)
+            def calc_residuals(params, points):
+                x0, y0, r = params
+                residuals = np.sqrt((points[:, 0] - x0) ** 2 + (points[:, 1] - y0) ** 2) - r
+                return residuals
+            def objective(params, points):
+                return np.sum(calc_residuals(params, points) ** 2)
+            x_mean = np.mean(points[:, 0])
+            y_mean = np.mean(points[:, 1])
+            r_mean = np.mean(np.sqrt((points[:, 0] - x_mean) ** 2 + (points[:, 1] - y_mean) ** 2))
+            init_params = [x_mean, y_mean, r_mean]
+            result = minimize(objective, init_params, args=(points,), method='L-BFGS-B')
+            x0, y0, r = result.x
+            return r, (x0, y0)
+        self.radius_middle, center_middle = fit_circle_least_squares(self.ordered_middle_np)
+        self.radius_outer_left, center_outer_left = fit_circle_least_squares(self.ordered_outer_left_np)
+        self.radius_outer_right, center_outer_right = fit_circle_least_squares(self.ordered_outer_right_np)
+        # Plot the circles on self.image_angles
+        cv2.circle(self.image_angles, (int(center_middle[0]), int(center_middle[1])), int(self.radius_middle), gc('yellow'), thickness=1)
+        cv2.circle(self.image_angles, (int(center_outer_left[0]), int(center_outer_left[1])), int(self.radius_outer_left), gc('pink'), thickness=1)
+        cv2.circle(self.image_angles, (int(center_outer_right[0]), int(center_outer_right[1])), int(self.radius_outer_right), gc('cyan'), thickness=1)
+        print('hi')
+    def calc_angle_bend(self):
+        print('hi')
+    def calc_angle_curl(self):
+        # Define the perpendicular line function
+        def wb_line_perpendicular(x):
+            return self.wb_slope_perpendicular * x + self.wb_intercept_perpendicular
+        # Calculate the slope of the line perpendicular to the given line
+        self.wb_slope_perpendicular = -1 / self.wb_slope
+        # Calculate the y-intercept of the line perpendicular to the given line
+        self.wb_intercept_perpendicular = self.inter_point[1] - self.wb_slope_perpendicular * self.inter_point[0]
+        # Line fit to first 3 points in self.ordered_middle
+        self.middle_tip_poly = np.polyfit(self.ordered_middle_np[0:3, 0], self.ordered_middle_np[0:3, 1], 1)
+        middle_tip_slope = self.middle_tip_poly[0]
+        # angle between middle_tip fit the curl perpendicular
+        theta = math.atan(abs((middle_tip_slope - self.wb_slope_perpendicular) / (1 + self.wb_slope_perpendicular*middle_tip_slope)))
+        # Convert the angle to degrees
+        self.angle_curl = math.degrees(theta)
+        print("The angle between the lines is:", self.angle_curl, "degrees")
+        # Draw the tangents at the intersection point
+        intersection_point = np.array(self.inter_point_outer_inner, dtype=int)
+        length = 50  # Length of the tangent lines
+        # Calculate the points for the tangent lines
+        curl_tangent_point1 = (intersection_point[0] - length, intersection_point[1] - length * self.wb_slope_perpendicular)
+        curl_tangent_point2 = (intersection_point[0] + length, intersection_point[1] + length * self.wb_slope_perpendicular)
+        middle_tip_tangent_point1 = (intersection_point[0] - length, intersection_point[1] - length * middle_tip_slope)
+        middle_tip_tangent_point2 = (intersection_point[0] + length, intersection_point[1] + length * middle_tip_slope)
+        # Convert the points to integers
+        curl_tangent_point1 = tuple(map(int, curl_tangent_point1))
+        curl_tangent_point2 = tuple(map(int, curl_tangent_point2))
+        middle_tip_tangent_point1 = tuple(map(int, middle_tip_tangent_point1))
+        middle_tip_tangent_point2 = tuple(map(int, middle_tip_tangent_point2))
+        # Draw the tangent lines
+        cv2.line(self.image_angles, intersection_point, curl_tangent_point1, gc('teal'), 1)
+        cv2.line(self.image_angles, intersection_point, curl_tangent_point2, gc('teal'), 1)
+        cv2.line(self.image_angles, intersection_point, middle_tip_tangent_point1, gc('teal'), 1)
+        cv2.line(self.image_angles, intersection_point, middle_tip_tangent_point2, gc('teal'), 1)
+        # Draw the arc representing the angle
+        cv2.ellipse(self.image_angles, tuple(intersection_point), (length, length), 0, 0, self.angle_curl, gc('teal'), 2)
+        cv2.ellipse(self.image_angles, tuple(intersection_point), (length, length), 180, 0, self.angle_curl, gc('teal'), 2)
+        ### plot the wb_line_perpendicular
+        # Calculate the y values for the start and end points of the line
+        y_start = max(0, int(wb_line_perpendicular(0)))
+        y_end = min(self.height, int(wb_line_perpendicular(self.width)))
+        # Define the range of y values for the line
+        y_range = np.linspace(y_start, y_end, num=100, dtype=int)  # You can adjust 'num' to control the number of points
+        # Draw the dotted gray line
+        for i in range(len(y_range) - 1):
+            y1, x1 = y_range[i], int((y_range[i] - self.wb_intercept_perpendicular) / self.wb_slope_perpendicular)
+            x1 = max(0, min(x1, self.width))  # Keep x1 within the bounds of the image width
+            y2, x2 = y_range[i+1], int((y_range[i+1] - self.wb_intercept_perpendicular) / self.wb_slope_perpendicular)
+            x2 = max(0, min(x2, self.width))  # Keep x2 within the bounds of the image width
+            if i % 2 == 0:  # Change the value of 2 to adjust the spacing between the dots
+                cv2.line(self.image_angles, (x1, y1), (x2, y2), gc('white'), 1)
+    def calc_angle_tangent(self):
+        # Define the polynomial functions
+        def left_func(x):
+            return self.left_poly[0]*x**2 + self.left_poly[1]*x + self.left_poly[2]
+        def right_func(x):
+            return self.right_poly[0]*x**2 + self.right_poly[1]*x + self.right_poly[2]
+        # Define the difference function
+        def left_right_diff(x):
+            return left_func(x) - right_func(x)
+        # Find the x-coordinate of the intersection point
+        intersection_x = fsolve(left_right_diff, 0)[0]
+        # Calculate the y-coordinate of the intersection point on the left and right curves
+        intersection_y_left = left_func(intersection_x)
+        intersection_y_right = right_func(intersection_x)
+        # Calculate the derivatives of the polynomials at the intersection point
+        left_derivative = 2*self.left_poly[0]*intersection_x + self.left_poly[1]
+        right_derivative = 2*self.right_poly[0]*intersection_x + self.right_poly[1]
+        # Calculate the angle between the tangents to the polynomials at the intersection point
+        theta = math.atan(abs((right_derivative - left_derivative) / (1 + left_derivative*right_derivative)))
+        # Convert the angle to degrees
+        self.angle_tangent = math.degrees(theta)
+        print("The angle between the left and right polynomials at their point of intersection is:", theta, "degrees")
+        # Draw the tangents at the intersection point
+        intersection_point = np.array([int(intersection_x), int(intersection_y_left + (intersection_y_right - intersection_y_left)/2)])
+        length = 30  # Length of the tangent lines
+        # Calculate the points for the tangent lines
+        left_tangent_point1 = (intersection_point[0] - length, intersection_point[1] - length * left_derivative)
+        left_tangent_point2 = (intersection_point[0] + length, intersection_point[1] + length * left_derivative)
+        right_tangent_point1 = (intersection_point[0] - length, intersection_point[1] - length * right_derivative)
+        right_tangent_point2 = (intersection_point[0] + length, intersection_point[1] + length * right_derivative)
+        # Convert the points to integers
+        left_tangent_point1 = tuple(map(int, left_tangent_point1))
+        left_tangent_point2 = tuple(map(int, left_tangent_point2))
+        right_tangent_point1 = tuple(map(int, right_tangent_point1))
+        right_tangent_point2 = tuple(map(int, right_tangent_point2))
+        # # Draw the tangent lines
+        # cv2.line(self.image_angles, intersection_point, left_tangent_point1, gc('yellow'), 1)
+        # cv2.line(self.image_angles, intersection_point, left_tangent_point2, gc('yellow'), 1)
+        # cv2.line(self.image_angles, intersection_point, right_tangent_point1, gc('yellow'), 1)
+        # cv2.line(self.image_angles, intersection_point, right_tangent_point2, gc('yellow'), 1)
+        # Draw the arc representing the angle
+        cv2.ellipse(self.image_angles, tuple(intersection_point), (length, length), 0, 0, self.angle_tangent, gc('yellow'), 2)
+        cv2.ellipse(self.image_angles, tuple(intersection_point), (length, length), 180, 0, self.angle_tangent, gc('yellow'), 2)
+        # self.show_angle_image()
+        # return theta
+    def group_outer_points(self):
+        # Split the points into two groups based on their position relative to the line
+        self.outer_left = []
+        self.outer_right = []
+        # if 'tip' in self.points_list:
+        for point in self.points_list['outer']:
+            x, y = point
+            predicted_y = self.predict_y(x)
+            if y > predicted_y:
+                self.outer_right.append(point)
+            else:
+                self.outer_left.append(point)
+        self.outer_right = np.array(self.outer_right)
+        self.outer_left = np.array(self.outer_left)
+        if (len(self.outer_right) < 3) or (len(self.outer_left) < 3):
+            self.keep_going = False
+        else:
+            # Plot `outer_left` points in pink
+            for point in self.outer_left:
+                x, y = point
+                cv2.circle(self.image, (x, y), radius=5, color=gc('pink'), thickness=-1)
+            # Plot `outer_right` points in cyan
+            for point in self.outer_right:
+                x, y = point
+                cv2.circle(self.image, (x, y), radius=5, color=gc('cyan'), thickness=-1)
+            ### outer_left
+            self.outer_left = self.order_points(self.outer_left)
+            self.outer_left = self.remove_duplicate_points(self.outer_left)
+            # self.outer_left = self.check_momentum(self.outer_left, False)
+            self.order_points_plot(self.outer_left, 'outer_left', 'final')
+            self.order_points_plot(self.outer_left, 'outer_left', 'QC')
+            self.outer_left_length, self.outer_left = self.get_length_of_ordered_points(self.outer_left, 'outer_left')
+            self.has_outer_left = True
+            ### outer_right
+            self.outer_right = self.order_points(self.outer_right)
+            self.outer_right = self.remove_duplicate_points(self.outer_right)
+            # self.outer_right = self.check_momentum(self.outer_right, False)
+            self.order_points_plot(self.outer_right, 'outer_right', 'final')
+            self.order_points_plot(self.outer_right, 'outer_right', 'QC')
+            self.outer_right_length, self.outer_right = self.get_length_of_ordered_points(self.outer_right, 'outer_right')
+            self.has_middle = True
+            print(f"Length outer_left - {self.outer_left_length}")
+            print(f"Length outer_right - {self.outer_right_length}")
+            self.outer_right_np = np.array(self.outer_right)
+            self.outer_left_np = np.array(self.outer_left)
+            self.ordered_middle_np = np.array(self.ordered_middle)
+            # Fit 2nd order polynomials to the line segments
+            self.left_poly = np.polyfit(self.outer_left_np[:, 0], self.outer_left_np[:, 1], 2)
+            self.right_poly = np.polyfit(self.outer_right_np[:, 0], self.outer_right_np[:, 1], 2)
+            self.middle_poly = np.polyfit(self.ordered_middle_np[:, 0], self.ordered_middle_np[:, 1], 2)
+            # Evaluate polynomial coefficients for a range of x values
+            x_range = np.linspace(0, self.width, num=100)
+            left_line = np.polyval(self.left_poly, x_range)
+            right_line = np.polyval(self.right_poly, x_range)
+            self.middle_line = np.polyval(self.middle_poly, x_range)
+            # Plot lines of fit as white lines
+            for i in range(len(x_range)-1):
+                cv2.line(self.image, (int(x_range[i]), int(left_line[i])), (int(x_range[i+1]), int(left_line[i+1])), color=gc('gray'), thickness=1)
+                cv2.line(self.image, (int(x_range[i]), int(right_line[i])), (int(x_range[i+1]), int(right_line[i+1])), color=gc('white'), thickness=1)
+                cv2.line(self.image, (int(x_range[i]), int(self.middle_line[i])), (int(x_range[i+1]), int(self.middle_line[i+1])), color=gc('white'), thickness=2)
+            # Define the polynomial functions
+            def left_func(x):
+                return self.left_poly[0]*x**2 + self.left_poly[1]*x + self.left_poly[2]
+            def right_func(x):
+                return self.right_poly[0]*x**2 + self.right_poly[1]*x + self.right_poly[2]
+            def middle_func(x):
+                return self.middle_poly[0]*x**2 + self.middle_poly[1]*x + self.middle_poly[2]
+            # Define the difference functions
+            def left_middle_diff(x):
+                return left_func(x) - middle_func(x)
+            def right_middle_diff(x):
+                return right_func(x) - middle_func(x)
+            def left_right_diff(x):
+                return left_func(x) - right_func(x)
+            # Find the intersection points
+            left_middle_intersection_x = fsolve(left_middle_diff, 0)
+            right_middle_intersection_x = fsolve(right_middle_diff, 0)
+            left_right_intersection_x = fsolve(left_right_diff, 0)
+            left_middle_intersection_y = left_func(left_middle_intersection_x)[0]
+            right_middle_intersection_y = right_func(right_middle_intersection_x)[0]
+            left_right_intersection_y = left_func(left_right_intersection_x)[0]
+            # Keep only points within the image boundaries
+            intersection_points = np.array([[left_middle_intersection_x, left_middle_intersection_y], [right_middle_intersection_x, right_middle_intersection_y], [left_right_intersection_x, left_right_intersection_y]])
+            intersection_points = intersection_points[(intersection_points[:, 0] >= 0) & (intersection_points[:, 0] <= self.width) & (intersection_points[:, 1] >= 0) & (intersection_points[:, 1] <= self.height)]
+            if intersection_points.size == 0:
+                self.keep_going = False
+            else:
+                # Compute the average of the intersection points
+                intersection_x = np.mean(intersection_points[:, 0])
+                intersection_y = np.mean(intersection_points[:, 1])
+                self.inter_point = [int(intersection_x), int(intersection_y)]
+                self.inter_point_outer_inner = [int(left_right_intersection_x), int(left_right_intersection_y)]
+                # Draw intersection point on the image
+                cv2.circle(self.image, (int(intersection_x), int(intersection_y)), radius=5, color=gc('green'), thickness=-1)
+                print(f"Length outer_left - {self.outer_left_length}")
+                print(f"Length outer_right - {self.outer_right_length}")
+                print(f"Intersection point - ({int(intersection_x)}, {int(intersection_y)})")
+                # Make the first points be at the tip, last points far away at base
+                def reorder_segment(segment, inter):
+                    # Convert to numpy arrays for easier manipulation
+                    segment = np.array(segment)
+                    inter = np.array(inter)
+                    # Calculate the Euclidean distance from the INTER point to the first and last points in the segment
+                    dist_first = np.linalg.norm(segment[0] - inter)
+                    dist_last = np.linalg.norm(segment[-1] - inter)
+                    # If the last point is closer to the INTER point than the first point, reverse the order of the segment
+                    if dist_last < dist_first:
+                        segment = segment[::-1]
+                    return segment.tolist()
+                self.ordered_middle = reorder_segment(self.ordered_middle, self.inter_point)
+                self.outer_left = reorder_segment(self.outer_left, self.inter_point)
+                self.outer_right = reorder_segment(self.outer_right, self.inter_point)
+                self.ordered_outer_right_np = np.array(self.outer_right)
+                self.ordered_outer_left_np = np.array(self.outer_left)
+                self.ordered_middle_np = np.array(self.ordered_middle)
+                # Draw a black ring around the last point of the outer_left segment
+                self.last_point_left = self.outer_left[-1]
+                cv2.circle(self.image, (int(self.last_point_left[0]), int(self.last_point_left[1])), radius=4, color=gc('black'), thickness=2)
+                cv2.circle(self.image, (int(self.last_point_left[0]), int(self.last_point_left[1])),  radius=6, color=gc('white'), thickness=2)
+                # Draw a black ring around the last point of the outer_right segment
+                self.last_point_right = self.outer_right[-1]
+                cv2.circle(self.image, (int(self.last_point_right[0]), int(self.last_point_right[1])), radius=4, color=gc('black'), thickness=2)
+                cv2.circle(self.image, (int(self.last_point_right[0]), int(self.last_point_right[1])), radius=6, color=gc('white'), thickness=2)
+                # self.show_QC_image()
+                # print('hi')
+    def split_image_by_middle(self):
+        if not self.has_middle:
+            self.keep_going = False
+        else:
+            n_fit = 2
+            # Convert the points to a numpy array
+            points_arr = np.array(self.ordered_middle)
+            # Fit a line to the points
+            self.midvein_fit = np.polyfit(points_arr[:, 0], points_arr[:, 1], n_fit)
+            # Plot a sample of points from along the line
+            max_dim = max(self.height, self.width)
+            if max_dim < 400:
+                num_points = 40
+            elif max_dim < 1000:
+                num_points = 80
+            else:
+                num_points = 120
+            # Get the endpoints of the line segment that lies within the bounds of the image
+            x1 = 0
+            y1 = int(self.midvein_fit[0] * x1**2 + self.midvein_fit[1] * x1 + self.midvein_fit[2])
+            x2 = self.width - 1
+            y2 = int(self.midvein_fit[0] * x2**2 + self.midvein_fit[1] * x2 + self.midvein_fit[2])
+            denom = self.midvein_fit[0]
+            if denom == 0:
+                denom = 0.0000000001
+            if y1 < 0:
+                y1 = 0
+                x1 = int((y1 - self.midvein_fit[1]) / denom)
+            if y2 >= self.height:
+                y2 = self.height - 1
+                x2 = int((y2 - self.midvein_fit[1]) / denom)
+            # Sample num_points points along the line segment within the bounds of the image
+            x_vals = np.linspace(x1, x2, num_points)
+            y_vals = self.midvein_fit[0] * x_vals**2 + self.midvein_fit[1] * x_vals + self.midvein_fit[2]
+            # Remove any points that are outside the bounds of the image
+            indices = np.where((y_vals >= 0) & (y_vals < self.height))[0]
+            x_vals = x_vals[indices]
+            y_vals = y_vals[indices]
+            # Recompute y-values using the line equation and updated x-values
+            y_vals = self.midvein_fit[0] * x_vals + self.midvein_fit[1]
+            self.midvein_fit_points = np.column_stack((x_vals, y_vals))
+            self.is_split = True
+            # Draw line of fit
+            # for point in self.midvein_fit_points:
+            #     cv2.circle(self.image, tuple(point.astype(int)), radius=1, color=(255, 255, 255), thickness=-1)
+    def predict_y(self, x):
+        return self.midvein_fit[0] * x**2 + self.midvein_fit[1] * x + self.midvein_fit[2]
+    def order_middle(self):
+        if 'middle' not in self.points_list:
+            self.keep_going = False
+        else:
+            if len(self.points_list['middle']) >= 5:
+                self.logger.debug(f"Ordered Middle - Raw list contains {len(self.points_list['middle'])} points - using momentum")
+                self.ordered_middle = self.order_points(self.points_list['middle'])
+                self.ordered_middle = self.remove_duplicate_points(self.ordered_middle)
+                self.ordered_middle = self.check_momentum(self.ordered_middle, False)
+                self.v_tip = self.find_v_tip(self.points_list['outer'])
+                # self.ordered_middle.append(self.v_tip)
+                self.order_points_plot(self.ordered_middle, 'middle', 'QC')
+                self.ordered_middle_length, self.ordered_middle = self.get_length_of_ordered_points(self.ordered_middle, 'middle')
+                self.has_middle = True
+            else:
+                self.keep_going = False
+                self.logger.debug(f"Ordered Middle - Raw list contains {len(self.points_list['middle'])} points - SKIPPING MIDDLE")
+    def v_shape_template(self, tip, scale):
+        return np.array([
+            [tip[0] - scale, tip[1] + scale],
+            tip,
+            [tip[0] + scale, tip[1] + scale]
+        ])
+    def error_function(self, params, points):
+        tip = params[:2]
+        scale = params[2]
+        template_points = self.v_shape_template(tip, scale)
+        error = 0
+        for p in points:
+            dist = np.min(np.linalg.norm(template_points - p, axis=1))
+            error += dist
+        return error
+    def find_v_tip(self, points):
+        points = np.array(points)
+        initial_guess = np.mean(points, axis=0)
+        initial_scale = np.linalg.norm(np.max(points, axis=0) - np.min(points, axis=0)) / 2
+        result = minimize(
+            self.error_function,
+            np.hstack([initial_guess, initial_scale]),
+            args=(points,),
+            method='Nelder-Mead'
+        )
+        tip = result.x[:2]
+        return tuple(map(int, tip))
+    def show_QC_image(self):
+        if self.do_show_QC_images:
+            cv2.imshow('QC image', self.image)
+            cv2.waitKey(0)
+    def show_angle_image(self):
+        if self.do_show_QC_images:
+            cv2.imshow('Angles image', self.image_angles)
+            cv2.waitKey(0)
+    def show_final_image(self):
+        if self.do_show_final_images:
+            cv2.imshow('Final image', self.image_final)
+            cv2.waitKey(0)
+    def get_length_of_ordered_points(self, points, name):
+        # if self.file_name == 'B_774373631_Ebenaceae_Diospyros_buxifolia__L__438-687-578-774':
+        #     print('hi')
+        total_length = 0
+        total_length_first_pass = 0
+        for i in range(len(points) - 1):
+            x1, y1 = points[i]
+            x2, y2 = points[i+1]
+            segment_length = math.sqrt((x2-x1)**2 + (y2-y1)**2)
+            total_length_first_pass += segment_length
+        cutoff = total_length_first_pass / 2
+        # print(f'Total length of {name}: {total_length_first_pass}')
+        # print(f'points length {len(points)}')
+        self.logger.debug(f"Total length of {name}: {total_length_first_pass}")
+        self.logger.debug(f"Points length {len(points)}")
+        # If there are more than 2 points, this will exclude extreme outliers, or
+        # misordered points that don't belong
+        if len(points) > 2:
+            pop_ind = []
+            for i in range(len(points) - 1):
+                x1, y1 = points[i]
+                x2, y2 = points[i+1]
+                segment_length = math.sqrt((x2-x1)**2 + (y2-y1)**2)
+                if segment_length < cutoff:
+                    total_length += segment_length
+                else:
+                    pop_ind.append(i)
+            for exclude in pop_ind:
+                points.pop(exclude)
+            # print(f'Total length of {name}: {total_length}')
+            # print(f'Excluded {len(pop_ind)} points')
+            # print(f'points length {len(points)}')
+            self.logger.debug(f"Total length of {name}: {total_length}")
+            self.logger.debug(f"Excluded {len(pop_ind)} points")
+            self.logger.debug(f"Points length {len(points)}")
+        else:
+            total_length = total_length_first_pass
+        return total_length, points
+    def order_points_plot(self, points, version, QC_or_final):
+        # thk_base = 0
+        thk_base = 16
+        if version == 'middle':
+            # color = (0, 255, 0)
+            color = gc('green')  # blue
+            thick = 1 #2 + thk_base
+        elif version == 'tip':
+            color = gc('green')
+            thick = 1 #2 + thk_base
+        elif version == 'outer':
+            color = gc('red')
+            thick = 1 #2 + thk_base
+        elif version == 'outer_left':
+            color = gc('pink')
+            thick = 1 #2 + thk_base
+        elif version == 'outer_right':
+            color = gc('cyan')
+            thick = 1 #2 + thk_base
+        # elif version == 'lamina_width_alt':
+        #     color = (100, 100, 255)
+        #     thick = 2 + thk_base
+        # elif version == 'not_reflex':
+        #     color = (200, 0, 123)
+        #     thick = 3 + thk_base
+        # elif version == 'reflex':
+        #     color = (0, 120, 200)
+        #     thick = 3 + thk_base
+        # elif version == 'petiole_tip_alt':
+        #     color = (255, 55, 100)
+        #     thick = 1 + thk_base
+        # elif version == 'petiole_tip':
+        #     color = (100, 255, 55)
+        #     thick = 1 + thk_base
+        # elif version == 'failed_angle':
+        #     color = (0, 0, 0)
+        #     thick = 3 + thk_base
+        # Convert the points to a numpy array and round to integer values
+        points_arr = np.round(np.array(points)).astype(int)
+        # Draw a green line connecting all of the points
+        if QC_or_final == 'QC':
+            for i in range(len(points_arr) - 1):
+                cv2.line(self.image, tuple(points_arr[i]), tuple(points_arr[i+1]), color, thick)
+        else:
+            for i in range(len(points_arr) - 1):
+                cv2.line(self.image_final, tuple(points_arr[i]), tuple(points_arr[i+1]), color, thick)
+    def check_momentum(self, coords, info):
+        original_coords = coords
+        # find middle index of coordinates
+        mid_idx = len(coords) // 2
+        # set up variables for running average
+        running_avg = np.array(coords[mid_idx-1])
+        avg_count = 1
+        # iterate over coordinates to check momentum change
+        prev_vec = np.array(coords[mid_idx-1]) - np.array(coords[mid_idx-2])
+        cur_idx = mid_idx - 1
+        while cur_idx >= 0:
+            cur_vec = np.array(coords[cur_idx]) - np.array(coords[cur_idx-1])
+            # add current point to running average
+            running_avg = (running_avg * avg_count + np.array(coords[cur_idx])) / (avg_count + 1)
+            avg_count += 1
+            # check for momentum change
+            if self.check_momentum_change(prev_vec, cur_vec):
+                break
+            prev_vec = cur_vec
+            cur_idx -= 1
+        # use running average to check for momentum change
+        cur_vec = np.array(coords[cur_idx]) - running_avg
+        if self.check_momentum_change(prev_vec, cur_vec):
+            cur_idx += 1
+        prev_vec = np.array(coords[mid_idx+1]) - np.array(coords[mid_idx])
+        cur_idx2 = mid_idx + 1
+        while cur_idx2 < len(coords):
+            # check if current index is out of range
+            if cur_idx2 >= len(coords):
+                break
+            cur_vec = np.array(coords[cur_idx2]) - np.array(coords[cur_idx2-1])
+            # add current point to running average
+            running_avg = (running_avg * avg_count + np.array(coords[cur_idx2])) / (avg_count + 1)
+            avg_count += 1
+            # check for momentum change
+            if self.check_momentum_change(prev_vec, cur_vec):
+                break
+            prev_vec = cur_vec
+            cur_idx2 += 1
+        # use running average to check for momentum change
+        if cur_idx2 < len(coords):
+            cur_vec = np.array(coords[cur_idx2]) - running_avg
+            if self.check_momentum_change(prev_vec, cur_vec):
+                cur_idx2 -= 1
+        # remove problematic points and subsequent points from list of coordinates
+        new_coords = coords[:cur_idx2] + coords[mid_idx:cur_idx2:-1]
+        if info:
+            return new_coords, len(original_coords) != len(new_coords)
+        else:
+            return new_coords
+    # define function to check for momentum change
+    def check_momentum_change(self, prev_vec, cur_vec):
+        dot_product = np.dot(prev_vec, cur_vec)
+        prev_norm = np.linalg.norm(prev_vec)
+        cur_norm = np.linalg.norm(cur_vec)
+        denom = (prev_norm * cur_norm)
+        if denom == 0:
+            denom = 0.0000000001
+        cos_theta = dot_product / denom
+        theta = np.arccos(cos_theta)
+        return abs(theta) > np.pi / 2
+    def remove_duplicate_points(self, points):
+        unique_set = set()
+        new_list = []
+        for item in points:
+            if item not in unique_set:
+                unique_set.add(item)
+                new_list.append(item)
+        return new_list
+    def distance(self, point1, point2):
+        x1, y1 = point1
+        x2, y2 = point2
+        return math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
+    ### Shortest distance
+    def order_points(self, points):
+        points = [tuple(point) for point in points]  # Convert numpy.ndarray points to tuples
+        best_tour = None
+        shortest_tour_length = float('inf')
+        for start_point in points:
+            tour = [start_point]
+            unvisited = set(points) - {start_point}
+            while unvisited:
+                nearest = min(unvisited, key=lambda point: self.distance(tour[-1], point))
+                tour.append(nearest)
+                unvisited.remove(nearest)
+            # Calculate the length of the current tour
+            tour_length = sum(self.distance(tour[i - 1], tour[i]) for i in range(1, len(tour)))
+            # Update the best_tour if the current tour is shorter
+            if tour_length < shortest_tour_length:
+                shortest_tour_length = tour_length
+                best_tour = tour
+        return best_tour
+    ### Smoothest
+    '''
+    def angle_between_points(self, p1, p2, p3):
+        v1 = np.array([p1[0] - p2[0], p1[1] - p2[1]])
+        v2 = np.array([p3[0] - p2[0], p3[1] - p2[1]])
+        angle = np.arccos(np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2)))
+        return angle
+    def order_points(self, points):
+        points = [tuple(point) for point in points]  # Convert numpy.ndarray points to tuples
+        best_tour = None
+        largest_sum_angles = 0
+        for start_point in points:
+            tour = [start_point]
+            unvisited = set(points) - {start_point}
+            while unvisited:
+                nearest = min(unvisited, key=lambda point: self.distance(tour[-1], point))
+                tour.append(nearest)
+                unvisited.remove(nearest)
+            # Calculate the sum of angles for the current tour
+            sum_angles = sum(self.angle_between_points(tour[i - 1], tour[i], tour[i + 1]) for i in range(1, len(tour) - 1))
+            # Update the best_tour if the current tour has a larger sum of angles
+            if sum_angles > largest_sum_angles:
+                largest_sum_angles = sum_angles
+                best_tour = tour
+        return best_tour
+    '''
+    ### ^^^ Smoothest
+    def convert_YOLO_bbox_to_point(self):
+        for point_type, bbox in self.points_list.items():
+            xy_points = []
+            for point in bbox:
+                x = point[0]
+                y = point[1]
+                w = point[2]
+                h = point[3]
+                x1 = int((x - w/2) * self.width)
+                y1 = int((y - h/2) * self.height)
+                x2 = int((x + w/2) * self.width)
+                y2 = int((y + h/2) * self.height)
+                xy_points.append((int((x1+x2)/2), int((y1+y2)/2)))
+            self.points_list[point_type] = xy_points
+    def parse_all_points(self):
+        points_list = {}
+        for sublist in self.all_points:
+            key = sublist[0]
+            value = sublist[1:]
+            key = self.swap_number_for_string(key)
+            if key not in points_list:
+                points_list[key] = []
+            points_list[key].append(value)
+        # print(points_list)
+        self.points_list = points_list
+    def swap_number_for_string(self, key):
+        for k, v in self.classes.items():
+            if v == key:
+                return k
+        return key
+    def setup_final_image(self):
+        self.image_final = cv2.imread(os.path.join(self.dir_temp, '.'.join([self.file_name, 'jpg'])))
+        if self.leaf_type == 'Landmarks_Armature':
+            self.path_image_final = os.path.join(self.Dirs.landmarks_armature_overlay_final, '.'.join([self.file_name, 'jpg']))
+    def setup_QC_image(self):
+        self.image = cv2.imread(os.path.join(self.dir_temp, '.'.join([self.file_name, 'jpg'])))
+        if self.leaf_type == 'Landmarks_Armature':
+            self.path_QC_image = os.path.join(self.Dirs.landmarks_armature_overlay_QC, '.'.join([self.file_name, 'jpg']))
+    def setup_angle_image(self):
+        self.image_angles = cv2.imread(os.path.join(self.dir_temp, '.'.join([self.file_name, 'jpg'])))
+        if self.leaf_type == 'Landmarks_Armature':
+            self.path_angles_image = os.path.join(self.Dirs.landmarks_armature_overlay_angles, '.'.join([self.file_name, 'jpg']))
+    def define_landmark_classes(self):
+        self.classes = {
+            'tip': 0,
+            'middle': 1,
+            'outer': 2,
+            }
+    def set_cfg_values(self):
+        self.do_show_QC_images = self.cfg['leafmachine']['landmark_detector_armature']['do_show_QC_images']
+        self.do_save_QC_images = self.cfg['leafmachine']['landmark_detector_armature']['do_save_QC_images']
+        self.do_show_final_images = self.cfg['leafmachine']['landmark_detector_armature']['do_show_final_images']
+        self.do_save_final_images = self.cfg['leafmachine']['landmark_detector_armature']['do_save_final_images']
+    def init_lists_dicts(self):
+        # Initialize all lists and dictionaries
+        self.classes = {}
+        self.points_list = []
+        self.image = []
+        self.ordered_middle = []
+        self.midvein_fit = []
+        self.midvein_fit_points = []
+        self.outer_right = []
+        self.outer_left = []
+        # self.ordered_outer_left = []
+        # self.ordered_outer_right = []
+        self.tip = []
+        self.apex_left = []
+        self.apex_right = []
+        self.apex_center = []
+        self.base_left = []
+        self.base_right = []
+        self.base_center = []
+        self.lamina_base = []
+        self.width_left = []
+        self.width_right = []
+    def get_final(self):
+        self.image_final = np.hstack((self.image, self.image_angles))
+        return self.image_final
+def euclidean_distance(p1, p2):
+    return math.sqrt((p1[0] - p2[0])**2 + (p1[1] - p2[1])**2)
+def gc(color):
+    colors = {
+        'red': (0, 0, 255),
+        'green': (0, 255, 0),
+        'blue': (255, 0, 0),
+        'yellow': (0, 255, 255),
+        'pink': (255, 0, 255),
+        'cyan': (255, 255, 0),
+        'black': (0, 0, 0),
+        'white': (255, 255, 255),
+        'gray': (128, 128, 128),
+        'orange': (0, 165, 255),
+        'purple': (128, 0, 128),
+        'lightpink': (203, 192, 255),
+        'brown': (42, 42, 165),
+        'navy': (128, 0, 0),
+        'teal': (128, 128, 0),
+    }
+    return colors.get(color.lower(), (0, 0, 0))

vouchervision/component_detector/color_profiles/ColorProfile__LANDMARK.csv ADDED Viewed

	@@ -0,0 +1,9 @@

+apex_angle,255,178,54
+base_angle,228,255,54
+lamina_base,52,240,233
+lamina_tip,246,33,255
+lamina_width,35,44,255
+lobe_tip,229,237,46
+midvein_trace,246,33,255
+petiole_tip,255,33,42
+petiole_trace,255,33,42

vouchervision/component_detector/color_profiles/ColorProfile__LANDMARK_ARM.csv ADDED Viewed

	@@ -0,0 +1,4 @@

+tip,35,44,255
+middle,228,255,54
+outer,52,240,233
+,,,

vouchervision/component_detector/color_profiles/ColorProfile__PLANT.csv ADDED Viewed

	@@ -0,0 +1,11 @@

+Leaf_WHOLE,0,255,55,00ff37
+Leaf_PARTIAL,0,255,250,69fffc
+Leaflet,255,203,0,ffcb00
+Seed_Fruit_ONE,252,255,0,fcff00
+Seed_Fruit_MANY,0,0,0,0
+Flower_ONE,255,52,255,ff34ff
+Flower_MANY,154,0,255,9a00ff
+Bud,255,0,9,ff0009
+Specimen,0,0,0,ceffc4
+Roots,255,134,0,ff8600
+Wood,144,22,22,901616