Spaces:

AutomaticReimbursementTool
/

demo

Sleeping

App Files Files Community

ankur-bohra commited on Jul 31, 2023

Commit

8152a82

1 Parent(s): d2ac459

Add interface

Browse files

Files changed (5) hide show

app.py +359 -13
categories/__init__.py +5 -5
categories/random_/__init__.py +8 -2
main.py +10 -5
requirements.txt +79 -301

app.py CHANGED Viewed

@@ -1,20 +1,366 @@
 import gradio as gr
 from categories import Category
-def predict(text):
-    pass
-_input = gr.File(type="binary", file_count="single", file_types=["image", "pdf"], label="Upload a receipt as a document or as an image")
-category_output = gr.Dropdown(Category.__members__.values(), value=Category.RANDOM, label="Identified category")
-information_output = gr.Json(label="Extracted information")
-demo = gr.Interface(
-    title="Automatic Reimbursement Tool Demo",
-    description="Description",
-    fn=predict,
-    inputs=_input,
-    outputs=[category_output, information_output],
-)
-demo.launch(debug=True, show_error=True)

+import base64
+import os
+from io import BytesIO
+from pathlib import Path
+from langchain.schema.output_parser import OutputParserException
 import gradio as gr
+from PIL import Image
+import categories
 from categories import Category
+from main import process_image, process_pdf
+HF_TOKEN = os.getenv("HF_TOKEN")
+PDF_IFRAME = """
+<div style="border-radius: 10px; width: 100%; overflow: hidden;">
+    <iframe
+        src="data:application/pdf;base64,{0}"
+        width="100%"
+        height="400"
+        type="application/pdf">
+    </iframe>
+</div>"""
+hf_writer_normal = gr.HuggingFaceDatasetSaver(
+    HF_TOKEN, "automatic-reimbursement-tool-demo", separate_dirs=False
+)
+hf_writer_incorrect = gr.HuggingFaceDatasetSaver(
+    HF_TOKEN, "automatic-reimbursement-tool-demo-incorrect", separate_dirs=False
+)
+# with open("examples/example1.pdf", "rb") as pdf_file:
+#     base64_pdf = base64.b64encode(pdf_file.read())
+# example_paths = []
+# current_file_path = None
+# def ignore_examples(function):
+#     def new_function(*args, **kwargs):
+#         global example_paths, current_file_path
+#         if current_file_path not in example_paths:
+#             return function(*args, **kwargs)
+def display_file(input_file):
+    global current_file_path
+    current_file_path = input_file.name if input_file else None
+    if not input_file:
+        return gr.HTML.update(visible=False), gr.Image.update(visible=False)
+    if input_file.name.endswith(".pdf"):
+        with open(input_file.name, "rb") as input_file:
+            pdf_base64 = base64.b64encode(input_file.read()).decode()
+        return gr.HTML.update(
+            PDF_IFRAME.format(pdf_base64), visible=True
+        ), gr.Image.update(visible=False)
+    else:
+        # image = Image.open(input_file.name)
+        return gr.HTML.update(visible=False), gr.Image.update(
+            input_file.name, visible=True
+        )
+def show_intermediate_outputs(show_intermediate):
+    if show_intermediate:
+        return gr.Accordion.update(visible=True)
+    else:
+        return gr.Accordion.update(visible=False)
+def show_share_contact(share_result):
+    return gr.Textbox.update(visible=share_result)
+def clear_inputs():
+    return gr.File.update(value=None)
+def submit(input_file, old_text):
+    if not input_file:
+        gr.Error("Please upload a file to continue!")
+        return gr.Textbox.update()
+    # Send change to preprocessed image or to extracted text
+    if input_file.name.endswith(".pdf"):
+        text = process_pdf(Path(input_file.name), extract_only=True)
+    else:
+        text = process_image(Path(input_file.name), extract_only=True)
+    return text
+def categorize_extracted_text(extracted_text):
+    category = categories.categorize_text(extracted_text)
+    # gr.Info(f"Recognized category: {category}")
+    return category
+def extract_from_category(category, extracted_text):
+    # gr.Info("Received category: " + category)
+    if not category:
+        return (
+            gr.Chatbot.update(None),
+            gr.JSON.update(None),
+            gr.Button.update(interactive=False),
+            gr.Button.update(interactive=False),
+        )
+    category = Category[category]
+    chain = categories.category_modules[category].chain
+    formatted_prompt = chain.prompt.format_prompt(
+        text=extracted_text,
+        format_instructions=chain.output_parser.get_format_instructions(),
+    )
+    result = chain.generate(
+        input_list=[
+            {
+                "text": extracted_text,
+                "format_instructions": chain.output_parser.get_format_instructions(),
+            }
+        ]
+    )
+    question = f""
+    if len(formatted_prompt.messages) > 1:
+        question += f"**System:**\n{formatted_prompt.messages[1].content}"
+    question += f"\n\n**Human:**\n{formatted_prompt.messages[0].content}"
+    answer = result.generations[0][0].text
+    try:
+        information = chain.output_parser.parse_with_prompt(answer, formatted_prompt)
+        information = information.json() if information else {}
+    except OutputParserException as e:
+        information = {
+            "error": "Unable to parse chatbot output",
+            "details": str(e),
+            "output": e.llm_output,
+        }
+    return (
+        gr.Chatbot.update([[question, answer]]),
+        gr.JSON.update(information),
+        gr.Button.update(interactive=True),
+        gr.Button.update(interactive=True),
+    )
+def dynamic_auto_flag(flag_method):
+    def modified_flag_method(share_result, *args, **kwargs):
+        if share_result:
+            flag_method(*args, **kwargs)
+    return modified_flag_method
+# def save_example_and_submit(input_file):
+#     example_paths.append(input_file.name)
+#     submit(input_file, "")
+with gr.Blocks(title="Automatic Reimbursement Tool Demo") as page:
+    gr.Markdown("<center><h1>Automatic Reimbursement Tool Demo</h1></center>")
+    gr.Markdown("<h2>Description</h2>")
+    gr.Markdown(
+        "The reimbursement filing process can be time-consuming and cumbersome, causing "
+        "frustration for faculty members and finance departments. Our project aims to "
+        "automate the information extraction involved in the process by feeding "
+        "extracted text to language models such as ChatGPT. This demo showcases the "
+        "categorization and extraction parts of the pipeline. Categorization is done "
+        "to identify the relevant details associated with the text, after which "
+        "extraction is done for those details using a language model."
+    )
+    gr.Markdown("<h2>Try it out!</h2>")
+    with gr.Box() as demo:
+        with gr.Row():
+            with gr.Column(variant="panel"):
+                gr.HTML(
+                    '<div><center style="color:rgb(200, 200, 200);">Input</center></div>'
+                )
+                pdf_preview = gr.HTML(label="Preview", show_label=True, visible=False)
+                image_preview = gr.Image(
+                    label="Preview", show_label=True, visible=False, height=350
+                )
+                input_file = gr.File(
+                    label="Input receipt",
+                    show_label=True,
+                    type="file",
+                    file_count="single",
+                    file_types=["image", ".pdf"],
+                )
+                input_file.change(
+                    display_file, input_file, [pdf_preview, image_preview]
+                )
+                with gr.Row():
+                    clear = gr.Button("Clear", variant="secondary")
+                    submit_button = gr.Button("Submit", variant="primary")
+                show_intermediate = gr.Checkbox(
+                    False,
+                    label="Show intermediate outputs",
+                    info="There are several intermediate steps in the process such as preprocessing, OCR, chatbot interaction. You can choose to show their results here.",
+                )
+                share_result = gr.Checkbox(
+                    True,
+                    label="Share results",
+                    info="Sharing your result with us will help us immensely in improving this tool.",
+                    interactive=True,
+                )
+                contact = gr.Textbox(
+                    type="email",
+                    label="Contact",
+                    interactive=True,
+                    placeholder="Enter your email address",
+                    info="Optionally, enter your email address to allow us to contact you regarding your result.",
+                    visible=True,
+                )
+                share_result.change(show_share_contact, share_result, [contact])
+            with gr.Column(variant="panel"):
+                gr.HTML(
+                    '<div><center style="color:rgb(200, 200, 200);">Output</center></div>'
+                )
+                category = gr.Dropdown(
+                    value=None,
+                    choices=Category.__members__.keys(),
+                    label=f"Recognized category ({', '.join(Category.__members__.keys())})",
+                    show_label=True,
+                    interactive=False,
+                )
+                intermediate_outputs = gr.Accordion(
+                    "Intermediate outputs", open=True, visible=False
+                )
+                with intermediate_outputs:
+                    extracted_text = gr.Textbox(
+                        label="Extracted text",
+                        show_label=True,
+                        max_lines=5,
+                        show_copy_button=True,
+                        lines=5,
+                        interactive=False,
+                    )
+                    chatbot = gr.Chatbot(
+                        None,
+                        label="Chatbot interaction",
+                        show_label=True,
+                        interactive=False,
+                        height=240,
+                    )
+                information = gr.JSON(label="Extracted information")
+                with gr.Row():
+                    flag_incorrect_button = gr.Button(
+                        "Flag as incorrect", variant="stop", interactive=True
+                    )
+                    flag_irrelevant_button = gr.Button(
+                        "Flag as irrelevant", variant="stop", interactive=True
+                    )
+            show_intermediate.change(
+                show_intermediate_outputs, show_intermediate, [intermediate_outputs]
+            )
+            clear.click(clear_inputs, None, [input_file])
+            submit_button.click(
+                submit,
+                [input_file, extracted_text],
+                [extracted_text],
+            )
+            submit_button.click(
+                lambda input_file, category, chatbot, information: (
+                    gr.Dropdown.update(None),
+                    gr.Chatbot.update(None),
+                    gr.Textbox.update(None),
+                ) if input_file else (category, chatbot, information),
+                [input_file, category, chatbot, information],
+                [category, chatbot, information],
+            )
+            extracted_text.change(
+                categorize_extracted_text,
+                [extracted_text],
+                [category],
+            )
+            category.change(
+                extract_from_category,
+                [category, extracted_text],
+                [chatbot, information, flag_incorrect_button, flag_irrelevant_button],
+            )
+            hf_writer_normal.setup(
+                [input_file, extracted_text, category, chatbot, information, contact],
+                flagging_dir="flagged",
+            )
+            flag_method = gr.flagging.FlagMethod(
+                hf_writer_normal, "", "", visual_feedback=True
+            )
+            information.change(
+                dynamic_auto_flag(flag_method),
+                inputs=[
+                    share_result,
+                    input_file,
+                    extracted_text,
+                    category,
+                    chatbot,
+                    information,
+                    contact,
+                ],
+                outputs=None,
+                preprocess=False,
+                queue=False,
+            )
+            hf_writer_incorrect.setup(
+                [input_file, extracted_text, category, chatbot, information, contact],
+                flagging_dir="flagged_incorrect",
+            )
+            flag_incorrect_method = gr.flagging.FlagMethod(
+                hf_writer_incorrect,
+                "Flag as incorrect",
+                "Incorrect",
+                visual_feedback=True,
+            )
+            flag_incorrect_button.click(
+                lambda: gr.Button.update(value="Saving...", interactive=False),
+                None,
+                flag_incorrect_button,
+                queue=False,
+            )
+            flag_incorrect_button.click(
+                flag_incorrect_method,
+                inputs=[
+                    input_file,
+                    extracted_text,
+                    category,
+                    chatbot,
+                    information,
+                    contact,
+                ],
+                outputs=[flag_incorrect_button],
+                preprocess=False,
+                queue=False,
+            )
+            flag_irrelevant_method = gr.flagging.FlagMethod(
+                hf_writer_incorrect,
+                "Flag as irrelevant",
+                "Irrelevant",
+                visual_feedback=True,
+            )
+            flag_irrelevant_button.click(
+                lambda: gr.Button.update(value="Saving...", interactive=False),
+                None,
+                flag_irrelevant_button,
+                queue=False,
+            )
+            flag_irrelevant_button.click(
+                flag_irrelevant_method,
+                inputs=[
+                    input_file,
+                    extracted_text,
+                    category,
+                    chatbot,
+                    information,
+                    contact,
+                ],
+                outputs=[flag_irrelevant_button],
+                preprocess=False,
+                queue=False,
+            )
+page.launch(show_api=True, show_error=True, debug=True)

categories/__init__.py CHANGED Viewed

@@ -14,11 +14,11 @@ from . import accomodation, random_, travel_cab, travel_flight
 class Category(Enum):
-    ACCOMODATION = "accomodation"
-    TRAVEL_FLIGHT = "travel_flight"
-    TRAVEL_CAB = "travel_cab"
-    # VENDOR = "vendor"
-    RANDOM = "random"
 category_modules = {

 class Category(Enum):
+    ACCOMODATION = "ACCOMODATION"
+    TRAVEL_FLIGHT = "TRAVEL_FLIGHT"
+    TRAVEL_CAB = "TRAVEL_CAB"
+    # VENDOR = "VENDOR"
+    RANDOM = "RANDOM"
 category_modules = {

categories/random_/__init__.py CHANGED Viewed

@@ -124,5 +124,11 @@ Customers desirous of availing input GST credit are requested to create a Busine
 Please note that this invoice is not a demand for payment
 Page 1 of 1"""
-    result = chain.run(text=text, format_instructions=fixing_parser.get_format_instructions())
-    print(result.json(indent=4))

 Please note that this invoice is not a demand for payment
 Page 1 of 1"""
+    # result = chain.prompt.format_prompt(text=text, format_instructions=fixing_parser.get_format_instructions())
+    # print(result.json(indent=4))
+    result = chain.generate(input_list=[{"text": text, "format_instructions": fixing_parser.get_format_instructions()}])
+    print(result)
+    result = fixing_parser.parse_with_prompt(result.generations[0][0].text, chain.prompt.format_prompt(text=text, format_instructions=fixing_parser.get_format_instructions()))
+    print(result)
+    # result = chain.run(text=text, format_instructions=output_parser.get_format_instructions(), verbose=True)
+    # print(result)

main.py CHANGED Viewed

@@ -16,11 +16,12 @@ def categorize_and_parse_text(text: str) -> BaseModel:
     Returns: The category of the text.
     """
     category = categories.categorize_text(text)
-    print("Categorized as category", category)
     result = categories.run_category_chain(category, text)
     return result
-def process_pdf(filename: Path) -> BaseModel:
     """Processes the given PDF file and extracts information from it.
     Args:
@@ -35,13 +36,14 @@ def process_pdf(filename: Path) -> BaseModel:
     # If the encoded text is too short, a pdf scanner probably added a watermark
     if len(text) < 20:
         # Try to extract text from images
-        images = processing.convert_pdf_to_image_pdf2image(pdf_bytes)
         text = extract.extract_text_from_images_pyocr_tesseract(images)
     result = categorize_and_parse_text(text)
     return result
-def process_image(filename: Path) -> BaseModel:
     """Processes the given image file and extracts information from it.
     Args:
@@ -50,8 +52,11 @@ def process_image(filename: Path) -> BaseModel:
     Returns: The extracted information.
     """
     image = Image.open(filename)
     text = extract.extract_text_from_image_pyocr_tesseract(image)
     image.close()
     result = categorize_and_parse_text(text)
     return result

     Returns: The category of the text.
     """
     category = categories.categorize_text(text)
+    # if stop_on_category:
+    #     return category, text
     result = categories.run_category_chain(category, text)
     return result
+def process_pdf(filename: Path, extract_only=False) -> BaseModel:
     """Processes the given PDF file and extracts information from it.
     Args:
     # If the encoded text is too short, a pdf scanner probably added a watermark
     if len(text) < 20:
         # Try to extract text from images
+        images = processing.preprocess_pdf_pdf2image(pdf_bytes)
         text = extract.extract_text_from_images_pyocr_tesseract(images)
+    if extract_only:
+        return text
     result = categorize_and_parse_text(text)
     return result
+def process_image(filename: Path, extract_only=False) -> BaseModel:
     """Processes the given image file and extracts information from it.
     Args:
     Returns: The extracted information.
     """
     image = Image.open(filename)
+    image = processing.preprocess_image(image)
     text = extract.extract_text_from_image_pyocr_tesseract(image)
     image.close()
+    if extract_only:
+        return text
     result = categorize_and_parse_text(text)
     return result

requirements.txt CHANGED Viewed

@@ -1,345 +1,123 @@
-aiobotocore==2.5.0
-aiofiles==22.1.0
-aiohttp==3.8.3
-aioitertools==0.7.1
-aiosignal==1.2.0
-aiosqlite==0.18.0
-alabaster==0.7.12
-anyio==3.5.0
-appdirs==1.4.4
-argon2-cffi==21.3.0
-argon2-cffi-bindings==21.2.0
-arrow==1.2.3
-astroid==2.14.2
-astropy==5.1
 asttokens==2.2.1
 async-timeout==4.0.2
-atomicwrites==1.4.0
-attrs==22.1.0
-Automat==20.2.0
-autopep8==1.6.0
-Babel==2.11.0
 backcall==0.2.0
-bcrypt==3.2.0
-beautifulsoup4==4.12.2
-binaryornot==0.4.4
-bleach==4.1.0
-bokeh==3.1.1
-botocore==1.29.76
-Bottleneck==1.3.5
-brotlipy==0.7.0
 certifi==2023.7.22
-cffi==1.15.1
-chardet==4.0.0
-charset-normalizer==2.0.4
-click==8.0.4
-cloudpickle==2.2.1
 colorama==0.4.6
-colorcet==3.0.1
 comm==0.1.3
-constantly==15.1.0
-contourpy==1.0.5
-cookiecutter==1.7.3
-cryptography==39.0.1
-cssselect==1.1.0
 cycler==0.11.0
-cytoolz==0.12.0
-daal4py==2023.1.1
-dask==2023.6.0
 dataclasses-json==0.5.13
-datasets==2.12.0
-datashader==0.15.0
-datashape==0.5.4
 debugpy==1.6.7
 decorator==5.1.1
-defusedxml==0.7.1
-diff-match-patch==20200713
-dill==0.3.6
-distributed==2023.6.0
-docstring-to-markdown==0.11
-docutils==0.18.1
-entrypoints==0.4
-et-xmlfile==1.1.0
-exceptiongroup==1.0.4
 executing==1.2.0
-fastjsonschema==2.16.2
-filelock==3.9.0
-flake8==6.0.0
-Flask==2.2.2
-fonttools==4.25.0
-frozenlist==1.3.3
-fsspec==2023.4.0
-gensim==4.3.0
-greenlet==2.0.1
-h5py==3.7.0
-HeapDict==1.0.1
-holoviews==1.16.2
-huggingface-hub==0.15.1
-hvplot==0.8.4
-hyperlink==21.0.0
 idna==3.4
-imagecodecs==2021.8.26
-imageio==2.26.0
-imagesize==1.4.1
-imbalanced-learn==0.10.1
-importlib-metadata==6.0.0
-incremental==21.3.0
-inflection==0.5.1
-iniconfig==1.1.1
-intake==0.6.8
-intervaltree==3.1.0
-ipykernel==6.22.0
-ipython==8.12.0
-ipython-genutils==0.2.0
-ipywidgets==8.0.4
-iso4217==1.9.20220401
-isort==5.9.3
-itemadapter==0.3.0
-itemloaders==1.0.4
-itsdangerous==2.0.1
-jaraco.classes==3.2.1
 jedi==0.18.2
-jellyfish==0.9.0
 Jinja2==3.1.2
-jinja2-time==0.2.0
-jmespath==0.10.0
-joblib==1.2.0
-json5==0.9.6
-jsonschema==4.17.3
-jupyter==1.0.0
-jupyter_client==8.2.0
-jupyter-console==6.6.3
-jupyter_core==5.3.0
-jupyter-events==0.6.3
-jupyter-server==1.23.6
-jupyter_server_fileid==0.9.0
-jupyter_server_terminals==0.4.4
-jupyter_server_ydoc==0.8.0
-jupyter-ydoc==0.2.4
-jupyterlab==3.6.3
-jupyterlab-pygments==0.1.2
-jupyterlab_server==2.22.0
-jupyterlab-widgets==3.0.5
-keyring==23.13.1
 kiwisolver==1.4.4
-langchain==0.0.245
 langsmith==0.0.15
-lazy_loader==0.2
-lazy-object-proxy==1.6.0
-linkify-it-py==2.0.0
-llvmlite==0.40.0
-lmdb==1.4.1
-locket==1.0.0
-lxml==4.9.2
-lz4==4.3.2
-Markdown==3.4.1
 markdown-it-py==2.2.0
-MarkupSafe==2.1.1
 marshmallow==3.20.1
-matplotlib==3.7.1
 matplotlib-inline==0.1.6
-mccabe==0.7.0
-mdit-py-plugins==0.3.0
-mdurl==0.1.0
-menuinst==1.4.19
-mistune==3.0.0
-mkl-fft==1.3.6
-mkl-random==1.2.2
-mkl-service==2.4.0
-more-itertools==8.12.0
-mpmath==1.2.1
-msgpack==1.0.3
-multidict==6.0.2
-multipledispatch==0.6.0
-multiprocess==0.70.14
-munkres==1.1.4
-mypy-extensions==0.4.3
-nbclassic==0.5.5
-nbclient==0.5.13
-nbconvert==7.7.3
-nbformat==5.7.0
 nest-asyncio==1.5.6
-networkx==2.8.4
-nltk==3.7
-notebook==6.5.4
-notebook_shim==0.2.2
-numba==0.57.0
 numexpr==2.8.4
-numpy==1.24.3
-numpydoc==1.5.0
 openai==0.27.8
 openapi-schema-pydantic==1.2.4
 opencv-python-headless==4.8.0.74
-openpyxl==3.0.10
-packaging==23.0
-pandas==1.5.3
-pandocfilters==1.5.0
-panel==1.1.0
-param==1.13.0
-paramiko==2.8.1
-parsel==1.6.0
 parso==0.8.3
-partd==1.2.0
-pathspec==0.10.3
-patsy==0.5.3
 pdf2image==1.16.3
-pep8==1.7.1
-pexpect==4.8.0
 pickleshare==0.7.5
-Pillow==9.4.0
-pip==23.1.2
-platformdirs==3.5.0
-plotly==5.9.0
-pluggy==1.0.0
-ply==3.11
-pooch==1.4.0
-poyo==0.5.0
-prometheus-client==0.14.1
-prompt-toolkit==3.0.38
-Protego==0.1.16
 psutil==5.9.5
-ptyprocess==0.7.0
 pure-eval==0.2.2
-py-cpuinfo==8.0.0
-pyarrow==11.0.0
-pyasn1==0.4.8
-pyasn1-modules==0.2.8
-pycodestyle==2.10.0
-pycparser==2.21
-pyct==0.5.0
-pycurl==7.45.2
 pydantic==1.10.12
-PyDispatcher==2.0.5
-pydocstyle==6.3.0
-pyerfa==2.0.0
-pyflakes==3.0.1
 Pygments==2.15.1
-pylint==2.16.2
-pylint-venv==2.3.0
-pyls-spyder==0.4.0
-PyNaCl==1.5.0
 pyocr==0.8.3
-pyodbc==4.0.34
-pyOpenSSL==23.0.0
 pyparsing==3.0.9
 pypdf==3.13.0
-PyQt5==5.15.7
-PyQt5-sip==12.11.0
-PyQtWebEngine==5.15.4
-pyrsistent==0.18.0
-PySocks==1.7.1
-pytest==7.3.1
 python-dateutil==2.8.2
-python-json-logger==2.0.7
-python-lsp-black==1.2.1
-python-lsp-jsonrpc==1.0.0
-python-lsp-server==1.7.2
-python-slugify==5.0.2
-python-snappy==0.6.1
-pytoolconfig==1.2.5
-pytz==2022.7
-pyviz-comms==2.3.0
-PyWavelets==1.4.1
-pywin32==305.1
-pywin32-ctypes==0.2.0
-pywinpty==2.0.10
-PyYAML==6.0
-pyzmq==25.0.2
-QDarkStyle==3.0.2
-qstylizer==0.2.2
-QtAwesome==1.2.2
-qtconsole==5.4.2
-QtPy==2.2.0
-queuelib==1.5.0
-regex==2022.7.9
-requests==2.29.0
-requests-file==1.5.1
-responses==0.13.3
-rfc3339-validator==0.1.4
-rfc3986-validator==0.1.1
-rope==1.7.0
-Rtree==1.0.1
-s3fs==2023.4.0
-sacremoses==0.0.43
-scikit-image==0.20.0
-scikit-learn==1.2.2
-scikit-learn-intelex==20230426.121158
-scipy==1.10.1
-Scrapy==2.8.0
-seaborn==0.12.2
-Send2Trash==1.8.0
-service-identity==18.1.0
-setuptools==67.8.0
-sip==6.6.2
 six==1.16.0
-smart-open==5.2.1
-sniffio==1.2.0
-snowballstemmer==2.2.0
-sortedcontainers==2.4.0
-soupsieve==2.4
-Sphinx==5.0.2
-sphinxcontrib-applehelp==1.0.2
-sphinxcontrib-devhelp==1.0.2
-sphinxcontrib-htmlhelp==2.0.0
-sphinxcontrib-jsmath==1.0.1
-sphinxcontrib-qthelp==1.0.3
-sphinxcontrib-serializinghtml==1.1.5
-spyder==5.4.3
-spyder-kernels==2.4.3
-SQLAlchemy==1.4.39
 stack-data==0.6.2
-statsmodels==0.13.5
-sympy==1.11.1
-tables==3.8.0
-tabulate==0.8.10
-TBB==0.2
-tblib==1.7.0
 tenacity==8.2.2
-terminado==0.17.1
-text-unidecode==1.3
-textdistance==4.2.1
-threadpoolctl==2.2.0
-three-merge==0.1.1
-tifffile==2021.7.2
-tinycss2==1.2.1
-tldextract==3.2.0
-tokenizers==0.13.2
-toml==0.10.2
-tomli==2.0.1
-tomlkit==0.11.1
 toolz==0.12.0
-torch==2.0.1
-tornado==6.3.1
 tqdm==4.65.0
 traitlets==5.9.0
-transformers==4.29.2
-Twisted==22.10.0
-twisted-iocpsupport==1.0.2
-typing_extensions==4.6.3
 typing-inspect==0.9.0
-uc-micro-py==1.0.1
-ujson==5.4.0
-Unidecode==1.2.0
-urllib3==1.26.16
-w3lib==1.21.0
-watchdog==2.1.6
 wcwidth==0.2.6
-webencodings==0.5.1
-websocket-client==0.58.0
-Werkzeug==2.2.3
-whatthepatch==1.0.2
 wheel==0.38.4
-widgetsnbextension==4.0.5
-win-inet-pton==1.1.0
-wrapt==1.14.1
-xarray==2022.11.0
-xlwings==0.29.1
-xxhash==2.0.2
-xyzservices==2022.9.0
-y-py==0.5.9
-yapf==0.31.0
-yarl==1.8.1
-ypy-websocket==0.8.2
-zict==2.2.0
-zipp==3.11.0
-zope.interface==5.4.0

+aiofiles==23.1.0
+aiohttp==3.8.5
+aiosignal==1.3.1
+altair==5.0.1
+annotated-types==0.5.0
+anyio==3.7.1
 asttokens==2.2.1
 async-timeout==4.0.2
+attrs==23.1.0
 backcall==0.2.0
+backports.functools-lru-cache==1.6.5
 certifi==2023.7.22
+charset-normalizer==3.2.0
+click==8.1.6
 colorama==0.4.6
 comm==0.1.3
+contourpy==1.1.0
 cycler==0.11.0
 dataclasses-json==0.5.13
+datasets==2.14.1
 debugpy==1.6.7
 decorator==5.1.1
+dill==0.3.7
+exceptiongroup==1.1.2
 executing==1.2.0
+fastapi==0.100.1
+ffmpy==0.3.1
+filelock==3.12.2
+fonttools==4.41.1
+frozenlist==1.4.0
+fsspec==2023.6.0
+gradio==3.39.0
+gradio_client==0.3.0
+greenlet==2.0.2
+h11==0.14.0
+httpcore==0.17.3
+httpx==0.24.1
+huggingface-hub==0.16.4
 idna==3.4
+importlib-metadata==6.8.0
+importlib-resources==6.0.0
+ipykernel==6.25.0
+ipython==8.14.0
+iso4217==1.11.20220401
 jedi==0.18.2
 Jinja2==3.1.2
+jsonschema==4.18.4
+jsonschema-specifications==2023.7.1
+jupyter_client==8.3.0
+jupyter_core==5.3.1
 kiwisolver==1.4.4
+langchain==0.0.247
 langsmith==0.0.15
+linkify-it-py==2.0.2
 markdown-it-py==2.2.0
+MarkupSafe==2.1.3
 marshmallow==3.20.1
+matplotlib==3.7.2
 matplotlib-inline==0.1.6
+mdit-py-plugins==0.3.3
+mdurl==0.1.2
+multidict==6.0.4
+multiprocess==0.70.15
+mypy-extensions==1.0.0
 nest-asyncio==1.5.6
 numexpr==2.8.4
+numpy==1.25.1
 openai==0.27.8
 openapi-schema-pydantic==1.2.4
 opencv-python-headless==4.8.0.74
+orjson==3.9.2
+packaging==23.1
+pandas==2.0.3
 parso==0.8.3
 pdf2image==1.16.3
 pickleshare==0.7.5
+Pillow==10.0.0
+pip==23.2.1
+platformdirs==3.9.1
+prompt-toolkit==3.0.39
 psutil==5.9.5
 pure-eval==0.2.2
+pyarrow==12.0.1
 pydantic==1.10.12
+pydantic_core==2.4.0
+pydub==0.25.1
 Pygments==2.15.1
 pyocr==0.8.3
 pyparsing==3.0.9
 pypdf==3.13.0
 python-dateutil==2.8.2
+python-multipart==0.0.6
+pytz==2023.3
+pywin32==304
+PyYAML==6.0.1
+pyzmq==25.1.0
+referencing==0.30.0
+requests==2.31.0
+rpds-py==0.9.2
+semantic-version==2.10.0
+setuptools==68.0.0
 six==1.16.0
+sniffio==1.3.0
+SQLAlchemy==2.0.19
 stack-data==0.6.2
+starlette==0.27.0
 tenacity==8.2.2
 toolz==0.12.0
+tornado==6.3.2
 tqdm==4.65.0
 traitlets==5.9.0
+typing_extensions==4.7.1
 typing-inspect==0.9.0
+tzdata==2023.3
+uc-micro-py==1.0.2
+urllib3==2.0.4
+uvicorn==0.23.1
 wcwidth==0.2.6
+websockets==11.0.3
 wheel==0.38.4
+xxhash==3.3.0
+yarl==1.9.2
+zipp==3.16.2