ankur-bohra commited on
Commit
8152a82
·
1 Parent(s): d2ac459

Add interface

Browse files
Files changed (5) hide show
  1. app.py +359 -13
  2. categories/__init__.py +5 -5
  3. categories/random_/__init__.py +8 -2
  4. main.py +10 -5
  5. requirements.txt +79 -301
app.py CHANGED
@@ -1,20 +1,366 @@
 
 
 
 
 
 
1
  import gradio as gr
 
 
 
2
  from categories import Category
 
3
 
4
- def predict(text):
5
- pass
 
 
 
 
 
 
 
 
6
 
 
 
 
 
 
 
 
 
7
 
8
- _input = gr.File(type="binary", file_count="single", file_types=["image", "pdf"], label="Upload a receipt as a document or as an image")
9
- category_output = gr.Dropdown(Category.__members__.values(), value=Category.RANDOM, label="Identified category")
10
- information_output = gr.Json(label="Extracted information")
11
 
12
- demo = gr.Interface(
13
- title="Automatic Reimbursement Tool Demo",
14
- description="Description",
15
- fn=predict,
16
- inputs=_input,
17
- outputs=[category_output, information_output],
18
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
- demo.launch(debug=True, show_error=True)
 
1
+ import base64
2
+ import os
3
+ from io import BytesIO
4
+ from pathlib import Path
5
+
6
+ from langchain.schema.output_parser import OutputParserException
7
  import gradio as gr
8
+ from PIL import Image
9
+
10
+ import categories
11
  from categories import Category
12
+ from main import process_image, process_pdf
13
 
14
+ HF_TOKEN = os.getenv("HF_TOKEN")
15
+ PDF_IFRAME = """
16
+ <div style="border-radius: 10px; width: 100%; overflow: hidden;">
17
+ <iframe
18
+ src="data:application/pdf;base64,{0}"
19
+ width="100%"
20
+ height="400"
21
+ type="application/pdf">
22
+ </iframe>
23
+ </div>"""
24
 
25
+ hf_writer_normal = gr.HuggingFaceDatasetSaver(
26
+ HF_TOKEN, "automatic-reimbursement-tool-demo", separate_dirs=False
27
+ )
28
+ hf_writer_incorrect = gr.HuggingFaceDatasetSaver(
29
+ HF_TOKEN, "automatic-reimbursement-tool-demo-incorrect", separate_dirs=False
30
+ )
31
+ # with open("examples/example1.pdf", "rb") as pdf_file:
32
+ # base64_pdf = base64.b64encode(pdf_file.read())
33
 
 
 
 
34
 
35
+ # example_paths = []
36
+ # current_file_path = None
37
+
38
+ # def ignore_examples(function):
39
+ # def new_function(*args, **kwargs):
40
+ # global example_paths, current_file_path
41
+ # if current_file_path not in example_paths:
42
+ # return function(*args, **kwargs)
43
+
44
+
45
+
46
+ def display_file(input_file):
47
+ global current_file_path
48
+ current_file_path = input_file.name if input_file else None
49
+ if not input_file:
50
+ return gr.HTML.update(visible=False), gr.Image.update(visible=False)
51
+ if input_file.name.endswith(".pdf"):
52
+ with open(input_file.name, "rb") as input_file:
53
+ pdf_base64 = base64.b64encode(input_file.read()).decode()
54
+ return gr.HTML.update(
55
+ PDF_IFRAME.format(pdf_base64), visible=True
56
+ ), gr.Image.update(visible=False)
57
+ else:
58
+ # image = Image.open(input_file.name)
59
+ return gr.HTML.update(visible=False), gr.Image.update(
60
+ input_file.name, visible=True
61
+ )
62
+
63
+
64
+ def show_intermediate_outputs(show_intermediate):
65
+ if show_intermediate:
66
+ return gr.Accordion.update(visible=True)
67
+ else:
68
+ return gr.Accordion.update(visible=False)
69
+
70
+
71
+ def show_share_contact(share_result):
72
+ return gr.Textbox.update(visible=share_result)
73
+
74
+
75
+ def clear_inputs():
76
+ return gr.File.update(value=None)
77
+
78
+
79
+ def submit(input_file, old_text):
80
+ if not input_file:
81
+ gr.Error("Please upload a file to continue!")
82
+ return gr.Textbox.update()
83
+
84
+ # Send change to preprocessed image or to extracted text
85
+ if input_file.name.endswith(".pdf"):
86
+ text = process_pdf(Path(input_file.name), extract_only=True)
87
+ else:
88
+ text = process_image(Path(input_file.name), extract_only=True)
89
+ return text
90
+
91
+
92
+ def categorize_extracted_text(extracted_text):
93
+ category = categories.categorize_text(extracted_text)
94
+ # gr.Info(f"Recognized category: {category}")
95
+ return category
96
+
97
+
98
+ def extract_from_category(category, extracted_text):
99
+ # gr.Info("Received category: " + category)
100
+ if not category:
101
+ return (
102
+ gr.Chatbot.update(None),
103
+ gr.JSON.update(None),
104
+ gr.Button.update(interactive=False),
105
+ gr.Button.update(interactive=False),
106
+ )
107
+ category = Category[category]
108
+ chain = categories.category_modules[category].chain
109
+ formatted_prompt = chain.prompt.format_prompt(
110
+ text=extracted_text,
111
+ format_instructions=chain.output_parser.get_format_instructions(),
112
+ )
113
+ result = chain.generate(
114
+ input_list=[
115
+ {
116
+ "text": extracted_text,
117
+ "format_instructions": chain.output_parser.get_format_instructions(),
118
+ }
119
+ ]
120
+ )
121
+ question = f""
122
+ if len(formatted_prompt.messages) > 1:
123
+ question += f"**System:**\n{formatted_prompt.messages[1].content}"
124
+ question += f"\n\n**Human:**\n{formatted_prompt.messages[0].content}"
125
+ answer = result.generations[0][0].text
126
+ try:
127
+ information = chain.output_parser.parse_with_prompt(answer, formatted_prompt)
128
+ information = information.json() if information else {}
129
+ except OutputParserException as e:
130
+ information = {
131
+ "error": "Unable to parse chatbot output",
132
+ "details": str(e),
133
+ "output": e.llm_output,
134
+ }
135
+ return (
136
+ gr.Chatbot.update([[question, answer]]),
137
+ gr.JSON.update(information),
138
+ gr.Button.update(interactive=True),
139
+ gr.Button.update(interactive=True),
140
+ )
141
+
142
+
143
+ def dynamic_auto_flag(flag_method):
144
+ def modified_flag_method(share_result, *args, **kwargs):
145
+ if share_result:
146
+ flag_method(*args, **kwargs)
147
+
148
+ return modified_flag_method
149
+
150
+
151
+ # def save_example_and_submit(input_file):
152
+ # example_paths.append(input_file.name)
153
+ # submit(input_file, "")
154
+
155
+
156
+ with gr.Blocks(title="Automatic Reimbursement Tool Demo") as page:
157
+ gr.Markdown("<center><h1>Automatic Reimbursement Tool Demo</h1></center>")
158
+ gr.Markdown("<h2>Description</h2>")
159
+ gr.Markdown(
160
+ "The reimbursement filing process can be time-consuming and cumbersome, causing "
161
+ "frustration for faculty members and finance departments. Our project aims to "
162
+ "automate the information extraction involved in the process by feeding "
163
+ "extracted text to language models such as ChatGPT. This demo showcases the "
164
+ "categorization and extraction parts of the pipeline. Categorization is done "
165
+ "to identify the relevant details associated with the text, after which "
166
+ "extraction is done for those details using a language model."
167
+ )
168
+ gr.Markdown("<h2>Try it out!</h2>")
169
+ with gr.Box() as demo:
170
+ with gr.Row():
171
+ with gr.Column(variant="panel"):
172
+ gr.HTML(
173
+ '<div><center style="color:rgb(200, 200, 200);">Input</center></div>'
174
+ )
175
+ pdf_preview = gr.HTML(label="Preview", show_label=True, visible=False)
176
+ image_preview = gr.Image(
177
+ label="Preview", show_label=True, visible=False, height=350
178
+ )
179
+ input_file = gr.File(
180
+ label="Input receipt",
181
+ show_label=True,
182
+ type="file",
183
+ file_count="single",
184
+ file_types=["image", ".pdf"],
185
+ )
186
+ input_file.change(
187
+ display_file, input_file, [pdf_preview, image_preview]
188
+ )
189
+
190
+ with gr.Row():
191
+ clear = gr.Button("Clear", variant="secondary")
192
+ submit_button = gr.Button("Submit", variant="primary")
193
+
194
+ show_intermediate = gr.Checkbox(
195
+ False,
196
+ label="Show intermediate outputs",
197
+ info="There are several intermediate steps in the process such as preprocessing, OCR, chatbot interaction. You can choose to show their results here.",
198
+ )
199
+ share_result = gr.Checkbox(
200
+ True,
201
+ label="Share results",
202
+ info="Sharing your result with us will help us immensely in improving this tool.",
203
+ interactive=True,
204
+ )
205
+ contact = gr.Textbox(
206
+ type="email",
207
+ label="Contact",
208
+ interactive=True,
209
+ placeholder="Enter your email address",
210
+ info="Optionally, enter your email address to allow us to contact you regarding your result.",
211
+ visible=True,
212
+ )
213
+ share_result.change(show_share_contact, share_result, [contact])
214
+
215
+ with gr.Column(variant="panel"):
216
+ gr.HTML(
217
+ '<div><center style="color:rgb(200, 200, 200);">Output</center></div>'
218
+ )
219
+ category = gr.Dropdown(
220
+ value=None,
221
+ choices=Category.__members__.keys(),
222
+ label=f"Recognized category ({', '.join(Category.__members__.keys())})",
223
+ show_label=True,
224
+ interactive=False,
225
+ )
226
+ intermediate_outputs = gr.Accordion(
227
+ "Intermediate outputs", open=True, visible=False
228
+ )
229
+ with intermediate_outputs:
230
+ extracted_text = gr.Textbox(
231
+ label="Extracted text",
232
+ show_label=True,
233
+ max_lines=5,
234
+ show_copy_button=True,
235
+ lines=5,
236
+ interactive=False,
237
+ )
238
+ chatbot = gr.Chatbot(
239
+ None,
240
+ label="Chatbot interaction",
241
+ show_label=True,
242
+ interactive=False,
243
+ height=240,
244
+ )
245
+ information = gr.JSON(label="Extracted information")
246
+ with gr.Row():
247
+ flag_incorrect_button = gr.Button(
248
+ "Flag as incorrect", variant="stop", interactive=True
249
+ )
250
+ flag_irrelevant_button = gr.Button(
251
+ "Flag as irrelevant", variant="stop", interactive=True
252
+ )
253
+
254
+ show_intermediate.change(
255
+ show_intermediate_outputs, show_intermediate, [intermediate_outputs]
256
+ )
257
+
258
+ clear.click(clear_inputs, None, [input_file])
259
+ submit_button.click(
260
+ submit,
261
+ [input_file, extracted_text],
262
+ [extracted_text],
263
+ )
264
+ submit_button.click(
265
+ lambda input_file, category, chatbot, information: (
266
+ gr.Dropdown.update(None),
267
+ gr.Chatbot.update(None),
268
+ gr.Textbox.update(None),
269
+ ) if input_file else (category, chatbot, information),
270
+ [input_file, category, chatbot, information],
271
+ [category, chatbot, information],
272
+ )
273
+ extracted_text.change(
274
+ categorize_extracted_text,
275
+ [extracted_text],
276
+ [category],
277
+ )
278
+ category.change(
279
+ extract_from_category,
280
+ [category, extracted_text],
281
+ [chatbot, information, flag_incorrect_button, flag_irrelevant_button],
282
+ )
283
+
284
+ hf_writer_normal.setup(
285
+ [input_file, extracted_text, category, chatbot, information, contact],
286
+ flagging_dir="flagged",
287
+ )
288
+ flag_method = gr.flagging.FlagMethod(
289
+ hf_writer_normal, "", "", visual_feedback=True
290
+ )
291
+ information.change(
292
+ dynamic_auto_flag(flag_method),
293
+ inputs=[
294
+ share_result,
295
+ input_file,
296
+ extracted_text,
297
+ category,
298
+ chatbot,
299
+ information,
300
+ contact,
301
+ ],
302
+ outputs=None,
303
+ preprocess=False,
304
+ queue=False,
305
+ )
306
+
307
+ hf_writer_incorrect.setup(
308
+ [input_file, extracted_text, category, chatbot, information, contact],
309
+ flagging_dir="flagged_incorrect",
310
+ )
311
+ flag_incorrect_method = gr.flagging.FlagMethod(
312
+ hf_writer_incorrect,
313
+ "Flag as incorrect",
314
+ "Incorrect",
315
+ visual_feedback=True,
316
+ )
317
+ flag_incorrect_button.click(
318
+ lambda: gr.Button.update(value="Saving...", interactive=False),
319
+ None,
320
+ flag_incorrect_button,
321
+ queue=False,
322
+ )
323
+ flag_incorrect_button.click(
324
+ flag_incorrect_method,
325
+ inputs=[
326
+ input_file,
327
+ extracted_text,
328
+ category,
329
+ chatbot,
330
+ information,
331
+ contact,
332
+ ],
333
+ outputs=[flag_incorrect_button],
334
+ preprocess=False,
335
+ queue=False,
336
+ )
337
+
338
+ flag_irrelevant_method = gr.flagging.FlagMethod(
339
+ hf_writer_incorrect,
340
+ "Flag as irrelevant",
341
+ "Irrelevant",
342
+ visual_feedback=True,
343
+ )
344
+ flag_irrelevant_button.click(
345
+ lambda: gr.Button.update(value="Saving...", interactive=False),
346
+ None,
347
+ flag_irrelevant_button,
348
+ queue=False,
349
+ )
350
+ flag_irrelevant_button.click(
351
+ flag_irrelevant_method,
352
+ inputs=[
353
+ input_file,
354
+ extracted_text,
355
+ category,
356
+ chatbot,
357
+ information,
358
+ contact,
359
+ ],
360
+ outputs=[flag_irrelevant_button],
361
+ preprocess=False,
362
+ queue=False,
363
+ )
364
+
365
 
366
+ page.launch(show_api=True, show_error=True, debug=True)
categories/__init__.py CHANGED
@@ -14,11 +14,11 @@ from . import accomodation, random_, travel_cab, travel_flight
14
 
15
 
16
  class Category(Enum):
17
- ACCOMODATION = "accomodation"
18
- TRAVEL_FLIGHT = "travel_flight"
19
- TRAVEL_CAB = "travel_cab"
20
- # VENDOR = "vendor"
21
- RANDOM = "random"
22
 
23
 
24
  category_modules = {
 
14
 
15
 
16
  class Category(Enum):
17
+ ACCOMODATION = "ACCOMODATION"
18
+ TRAVEL_FLIGHT = "TRAVEL_FLIGHT"
19
+ TRAVEL_CAB = "TRAVEL_CAB"
20
+ # VENDOR = "VENDOR"
21
+ RANDOM = "RANDOM"
22
 
23
 
24
  category_modules = {
categories/random_/__init__.py CHANGED
@@ -124,5 +124,11 @@ Customers desirous of availing input GST credit are requested to create a Busine
124
  Please note that this invoice is not a demand for payment
125
 
126
  Page 1 of 1"""
127
- result = chain.run(text=text, format_instructions=fixing_parser.get_format_instructions())
128
- print(result.json(indent=4))
 
 
 
 
 
 
 
124
  Please note that this invoice is not a demand for payment
125
 
126
  Page 1 of 1"""
127
+ # result = chain.prompt.format_prompt(text=text, format_instructions=fixing_parser.get_format_instructions())
128
+ # print(result.json(indent=4))
129
+ result = chain.generate(input_list=[{"text": text, "format_instructions": fixing_parser.get_format_instructions()}])
130
+ print(result)
131
+ result = fixing_parser.parse_with_prompt(result.generations[0][0].text, chain.prompt.format_prompt(text=text, format_instructions=fixing_parser.get_format_instructions()))
132
+ print(result)
133
+ # result = chain.run(text=text, format_instructions=output_parser.get_format_instructions(), verbose=True)
134
+ # print(result)
main.py CHANGED
@@ -16,11 +16,12 @@ def categorize_and_parse_text(text: str) -> BaseModel:
16
  Returns: The category of the text.
17
  """
18
  category = categories.categorize_text(text)
19
- print("Categorized as category", category)
 
20
  result = categories.run_category_chain(category, text)
21
  return result
22
 
23
- def process_pdf(filename: Path) -> BaseModel:
24
  """Processes the given PDF file and extracts information from it.
25
 
26
  Args:
@@ -35,13 +36,14 @@ def process_pdf(filename: Path) -> BaseModel:
35
  # If the encoded text is too short, a pdf scanner probably added a watermark
36
  if len(text) < 20:
37
  # Try to extract text from images
38
- images = processing.convert_pdf_to_image_pdf2image(pdf_bytes)
39
  text = extract.extract_text_from_images_pyocr_tesseract(images)
40
-
 
41
  result = categorize_and_parse_text(text)
42
  return result
43
 
44
- def process_image(filename: Path) -> BaseModel:
45
  """Processes the given image file and extracts information from it.
46
 
47
  Args:
@@ -50,8 +52,11 @@ def process_image(filename: Path) -> BaseModel:
50
  Returns: The extracted information.
51
  """
52
  image = Image.open(filename)
 
53
  text = extract.extract_text_from_image_pyocr_tesseract(image)
54
  image.close()
 
 
55
  result = categorize_and_parse_text(text)
56
  return result
57
 
 
16
  Returns: The category of the text.
17
  """
18
  category = categories.categorize_text(text)
19
+ # if stop_on_category:
20
+ # return category, text
21
  result = categories.run_category_chain(category, text)
22
  return result
23
 
24
+ def process_pdf(filename: Path, extract_only=False) -> BaseModel:
25
  """Processes the given PDF file and extracts information from it.
26
 
27
  Args:
 
36
  # If the encoded text is too short, a pdf scanner probably added a watermark
37
  if len(text) < 20:
38
  # Try to extract text from images
39
+ images = processing.preprocess_pdf_pdf2image(pdf_bytes)
40
  text = extract.extract_text_from_images_pyocr_tesseract(images)
41
+ if extract_only:
42
+ return text
43
  result = categorize_and_parse_text(text)
44
  return result
45
 
46
+ def process_image(filename: Path, extract_only=False) -> BaseModel:
47
  """Processes the given image file and extracts information from it.
48
 
49
  Args:
 
52
  Returns: The extracted information.
53
  """
54
  image = Image.open(filename)
55
+ image = processing.preprocess_image(image)
56
  text = extract.extract_text_from_image_pyocr_tesseract(image)
57
  image.close()
58
+ if extract_only:
59
+ return text
60
  result = categorize_and_parse_text(text)
61
  return result
62
 
requirements.txt CHANGED
@@ -1,345 +1,123 @@
1
- aiobotocore==2.5.0
2
- aiofiles==22.1.0
3
- aiohttp==3.8.3
4
- aioitertools==0.7.1
5
- aiosignal==1.2.0
6
- aiosqlite==0.18.0
7
- alabaster==0.7.12
8
- anyio==3.5.0
9
- appdirs==1.4.4
10
- argon2-cffi==21.3.0
11
- argon2-cffi-bindings==21.2.0
12
- arrow==1.2.3
13
- astroid==2.14.2
14
- astropy==5.1
15
  asttokens==2.2.1
16
  async-timeout==4.0.2
17
- atomicwrites==1.4.0
18
- attrs==22.1.0
19
- Automat==20.2.0
20
- autopep8==1.6.0
21
- Babel==2.11.0
22
  backcall==0.2.0
23
- bcrypt==3.2.0
24
- beautifulsoup4==4.12.2
25
- binaryornot==0.4.4
26
- bleach==4.1.0
27
- bokeh==3.1.1
28
- botocore==1.29.76
29
- Bottleneck==1.3.5
30
- brotlipy==0.7.0
31
  certifi==2023.7.22
32
- cffi==1.15.1
33
- chardet==4.0.0
34
- charset-normalizer==2.0.4
35
- click==8.0.4
36
- cloudpickle==2.2.1
37
  colorama==0.4.6
38
- colorcet==3.0.1
39
  comm==0.1.3
40
- constantly==15.1.0
41
- contourpy==1.0.5
42
- cookiecutter==1.7.3
43
- cryptography==39.0.1
44
- cssselect==1.1.0
45
  cycler==0.11.0
46
- cytoolz==0.12.0
47
- daal4py==2023.1.1
48
- dask==2023.6.0
49
  dataclasses-json==0.5.13
50
- datasets==2.12.0
51
- datashader==0.15.0
52
- datashape==0.5.4
53
  debugpy==1.6.7
54
  decorator==5.1.1
55
- defusedxml==0.7.1
56
- diff-match-patch==20200713
57
- dill==0.3.6
58
- distributed==2023.6.0
59
- docstring-to-markdown==0.11
60
- docutils==0.18.1
61
- entrypoints==0.4
62
- et-xmlfile==1.1.0
63
- exceptiongroup==1.0.4
64
  executing==1.2.0
65
- fastjsonschema==2.16.2
66
- filelock==3.9.0
67
- flake8==6.0.0
68
- Flask==2.2.2
69
- fonttools==4.25.0
70
- frozenlist==1.3.3
71
- fsspec==2023.4.0
72
- gensim==4.3.0
73
- greenlet==2.0.1
74
- h5py==3.7.0
75
- HeapDict==1.0.1
76
- holoviews==1.16.2
77
- huggingface-hub==0.15.1
78
- hvplot==0.8.4
79
- hyperlink==21.0.0
80
  idna==3.4
81
- imagecodecs==2021.8.26
82
- imageio==2.26.0
83
- imagesize==1.4.1
84
- imbalanced-learn==0.10.1
85
- importlib-metadata==6.0.0
86
- incremental==21.3.0
87
- inflection==0.5.1
88
- iniconfig==1.1.1
89
- intake==0.6.8
90
- intervaltree==3.1.0
91
- ipykernel==6.22.0
92
- ipython==8.12.0
93
- ipython-genutils==0.2.0
94
- ipywidgets==8.0.4
95
- iso4217==1.9.20220401
96
- isort==5.9.3
97
- itemadapter==0.3.0
98
- itemloaders==1.0.4
99
- itsdangerous==2.0.1
100
- jaraco.classes==3.2.1
101
  jedi==0.18.2
102
- jellyfish==0.9.0
103
  Jinja2==3.1.2
104
- jinja2-time==0.2.0
105
- jmespath==0.10.0
106
- joblib==1.2.0
107
- json5==0.9.6
108
- jsonschema==4.17.3
109
- jupyter==1.0.0
110
- jupyter_client==8.2.0
111
- jupyter-console==6.6.3
112
- jupyter_core==5.3.0
113
- jupyter-events==0.6.3
114
- jupyter-server==1.23.6
115
- jupyter_server_fileid==0.9.0
116
- jupyter_server_terminals==0.4.4
117
- jupyter_server_ydoc==0.8.0
118
- jupyter-ydoc==0.2.4
119
- jupyterlab==3.6.3
120
- jupyterlab-pygments==0.1.2
121
- jupyterlab_server==2.22.0
122
- jupyterlab-widgets==3.0.5
123
- keyring==23.13.1
124
  kiwisolver==1.4.4
125
- langchain==0.0.245
126
  langsmith==0.0.15
127
- lazy_loader==0.2
128
- lazy-object-proxy==1.6.0
129
- linkify-it-py==2.0.0
130
- llvmlite==0.40.0
131
- lmdb==1.4.1
132
- locket==1.0.0
133
- lxml==4.9.2
134
- lz4==4.3.2
135
- Markdown==3.4.1
136
  markdown-it-py==2.2.0
137
- MarkupSafe==2.1.1
138
  marshmallow==3.20.1
139
- matplotlib==3.7.1
140
  matplotlib-inline==0.1.6
141
- mccabe==0.7.0
142
- mdit-py-plugins==0.3.0
143
- mdurl==0.1.0
144
- menuinst==1.4.19
145
- mistune==3.0.0
146
- mkl-fft==1.3.6
147
- mkl-random==1.2.2
148
- mkl-service==2.4.0
149
- more-itertools==8.12.0
150
- mpmath==1.2.1
151
- msgpack==1.0.3
152
- multidict==6.0.2
153
- multipledispatch==0.6.0
154
- multiprocess==0.70.14
155
- munkres==1.1.4
156
- mypy-extensions==0.4.3
157
- nbclassic==0.5.5
158
- nbclient==0.5.13
159
- nbconvert==7.7.3
160
- nbformat==5.7.0
161
  nest-asyncio==1.5.6
162
- networkx==2.8.4
163
- nltk==3.7
164
- notebook==6.5.4
165
- notebook_shim==0.2.2
166
- numba==0.57.0
167
  numexpr==2.8.4
168
- numpy==1.24.3
169
- numpydoc==1.5.0
170
  openai==0.27.8
171
  openapi-schema-pydantic==1.2.4
172
  opencv-python-headless==4.8.0.74
173
- openpyxl==3.0.10
174
- packaging==23.0
175
- pandas==1.5.3
176
- pandocfilters==1.5.0
177
- panel==1.1.0
178
- param==1.13.0
179
- paramiko==2.8.1
180
- parsel==1.6.0
181
  parso==0.8.3
182
- partd==1.2.0
183
- pathspec==0.10.3
184
- patsy==0.5.3
185
  pdf2image==1.16.3
186
- pep8==1.7.1
187
- pexpect==4.8.0
188
  pickleshare==0.7.5
189
- Pillow==9.4.0
190
- pip==23.1.2
191
- platformdirs==3.5.0
192
- plotly==5.9.0
193
- pluggy==1.0.0
194
- ply==3.11
195
- pooch==1.4.0
196
- poyo==0.5.0
197
- prometheus-client==0.14.1
198
- prompt-toolkit==3.0.38
199
- Protego==0.1.16
200
  psutil==5.9.5
201
- ptyprocess==0.7.0
202
  pure-eval==0.2.2
203
- py-cpuinfo==8.0.0
204
- pyarrow==11.0.0
205
- pyasn1==0.4.8
206
- pyasn1-modules==0.2.8
207
- pycodestyle==2.10.0
208
- pycparser==2.21
209
- pyct==0.5.0
210
- pycurl==7.45.2
211
  pydantic==1.10.12
212
- PyDispatcher==2.0.5
213
- pydocstyle==6.3.0
214
- pyerfa==2.0.0
215
- pyflakes==3.0.1
216
  Pygments==2.15.1
217
- pylint==2.16.2
218
- pylint-venv==2.3.0
219
- pyls-spyder==0.4.0
220
- PyNaCl==1.5.0
221
  pyocr==0.8.3
222
- pyodbc==4.0.34
223
- pyOpenSSL==23.0.0
224
  pyparsing==3.0.9
225
  pypdf==3.13.0
226
- PyQt5==5.15.7
227
- PyQt5-sip==12.11.0
228
- PyQtWebEngine==5.15.4
229
- pyrsistent==0.18.0
230
- PySocks==1.7.1
231
- pytest==7.3.1
232
  python-dateutil==2.8.2
233
- python-json-logger==2.0.7
234
- python-lsp-black==1.2.1
235
- python-lsp-jsonrpc==1.0.0
236
- python-lsp-server==1.7.2
237
- python-slugify==5.0.2
238
- python-snappy==0.6.1
239
- pytoolconfig==1.2.5
240
- pytz==2022.7
241
- pyviz-comms==2.3.0
242
- PyWavelets==1.4.1
243
- pywin32==305.1
244
- pywin32-ctypes==0.2.0
245
- pywinpty==2.0.10
246
- PyYAML==6.0
247
- pyzmq==25.0.2
248
- QDarkStyle==3.0.2
249
- qstylizer==0.2.2
250
- QtAwesome==1.2.2
251
- qtconsole==5.4.2
252
- QtPy==2.2.0
253
- queuelib==1.5.0
254
- regex==2022.7.9
255
- requests==2.29.0
256
- requests-file==1.5.1
257
- responses==0.13.3
258
- rfc3339-validator==0.1.4
259
- rfc3986-validator==0.1.1
260
- rope==1.7.0
261
- Rtree==1.0.1
262
- s3fs==2023.4.0
263
- sacremoses==0.0.43
264
- scikit-image==0.20.0
265
- scikit-learn==1.2.2
266
- scikit-learn-intelex==20230426.121158
267
- scipy==1.10.1
268
- Scrapy==2.8.0
269
- seaborn==0.12.2
270
- Send2Trash==1.8.0
271
- service-identity==18.1.0
272
- setuptools==67.8.0
273
- sip==6.6.2
274
  six==1.16.0
275
- smart-open==5.2.1
276
- sniffio==1.2.0
277
- snowballstemmer==2.2.0
278
- sortedcontainers==2.4.0
279
- soupsieve==2.4
280
- Sphinx==5.0.2
281
- sphinxcontrib-applehelp==1.0.2
282
- sphinxcontrib-devhelp==1.0.2
283
- sphinxcontrib-htmlhelp==2.0.0
284
- sphinxcontrib-jsmath==1.0.1
285
- sphinxcontrib-qthelp==1.0.3
286
- sphinxcontrib-serializinghtml==1.1.5
287
- spyder==5.4.3
288
- spyder-kernels==2.4.3
289
- SQLAlchemy==1.4.39
290
  stack-data==0.6.2
291
- statsmodels==0.13.5
292
- sympy==1.11.1
293
- tables==3.8.0
294
- tabulate==0.8.10
295
- TBB==0.2
296
- tblib==1.7.0
297
  tenacity==8.2.2
298
- terminado==0.17.1
299
- text-unidecode==1.3
300
- textdistance==4.2.1
301
- threadpoolctl==2.2.0
302
- three-merge==0.1.1
303
- tifffile==2021.7.2
304
- tinycss2==1.2.1
305
- tldextract==3.2.0
306
- tokenizers==0.13.2
307
- toml==0.10.2
308
- tomli==2.0.1
309
- tomlkit==0.11.1
310
  toolz==0.12.0
311
- torch==2.0.1
312
- tornado==6.3.1
313
  tqdm==4.65.0
314
  traitlets==5.9.0
315
- transformers==4.29.2
316
- Twisted==22.10.0
317
- twisted-iocpsupport==1.0.2
318
- typing_extensions==4.6.3
319
  typing-inspect==0.9.0
320
- uc-micro-py==1.0.1
321
- ujson==5.4.0
322
- Unidecode==1.2.0
323
- urllib3==1.26.16
324
- w3lib==1.21.0
325
- watchdog==2.1.6
326
  wcwidth==0.2.6
327
- webencodings==0.5.1
328
- websocket-client==0.58.0
329
- Werkzeug==2.2.3
330
- whatthepatch==1.0.2
331
  wheel==0.38.4
332
- widgetsnbextension==4.0.5
333
- win-inet-pton==1.1.0
334
- wrapt==1.14.1
335
- xarray==2022.11.0
336
- xlwings==0.29.1
337
- xxhash==2.0.2
338
- xyzservices==2022.9.0
339
- y-py==0.5.9
340
- yapf==0.31.0
341
- yarl==1.8.1
342
- ypy-websocket==0.8.2
343
- zict==2.2.0
344
- zipp==3.11.0
345
- zope.interface==5.4.0
 
1
+ aiofiles==23.1.0
2
+ aiohttp==3.8.5
3
+ aiosignal==1.3.1
4
+ altair==5.0.1
5
+ annotated-types==0.5.0
6
+ anyio==3.7.1
 
 
 
 
 
 
 
 
7
  asttokens==2.2.1
8
  async-timeout==4.0.2
9
+ attrs==23.1.0
 
 
 
 
10
  backcall==0.2.0
11
+ backports.functools-lru-cache==1.6.5
 
 
 
 
 
 
 
12
  certifi==2023.7.22
13
+ charset-normalizer==3.2.0
14
+ click==8.1.6
 
 
 
15
  colorama==0.4.6
 
16
  comm==0.1.3
17
+ contourpy==1.1.0
 
 
 
 
18
  cycler==0.11.0
 
 
 
19
  dataclasses-json==0.5.13
20
+ datasets==2.14.1
 
 
21
  debugpy==1.6.7
22
  decorator==5.1.1
23
+ dill==0.3.7
24
+ exceptiongroup==1.1.2
 
 
 
 
 
 
 
25
  executing==1.2.0
26
+ fastapi==0.100.1
27
+ ffmpy==0.3.1
28
+ filelock==3.12.2
29
+ fonttools==4.41.1
30
+ frozenlist==1.4.0
31
+ fsspec==2023.6.0
32
+ gradio==3.39.0
33
+ gradio_client==0.3.0
34
+ greenlet==2.0.2
35
+ h11==0.14.0
36
+ httpcore==0.17.3
37
+ httpx==0.24.1
38
+ huggingface-hub==0.16.4
 
 
39
  idna==3.4
40
+ importlib-metadata==6.8.0
41
+ importlib-resources==6.0.0
42
+ ipykernel==6.25.0
43
+ ipython==8.14.0
44
+ iso4217==1.11.20220401
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  jedi==0.18.2
 
46
  Jinja2==3.1.2
47
+ jsonschema==4.18.4
48
+ jsonschema-specifications==2023.7.1
49
+ jupyter_client==8.3.0
50
+ jupyter_core==5.3.1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  kiwisolver==1.4.4
52
+ langchain==0.0.247
53
  langsmith==0.0.15
54
+ linkify-it-py==2.0.2
 
 
 
 
 
 
 
 
55
  markdown-it-py==2.2.0
56
+ MarkupSafe==2.1.3
57
  marshmallow==3.20.1
58
+ matplotlib==3.7.2
59
  matplotlib-inline==0.1.6
60
+ mdit-py-plugins==0.3.3
61
+ mdurl==0.1.2
62
+ multidict==6.0.4
63
+ multiprocess==0.70.15
64
+ mypy-extensions==1.0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  nest-asyncio==1.5.6
 
 
 
 
 
66
  numexpr==2.8.4
67
+ numpy==1.25.1
 
68
  openai==0.27.8
69
  openapi-schema-pydantic==1.2.4
70
  opencv-python-headless==4.8.0.74
71
+ orjson==3.9.2
72
+ packaging==23.1
73
+ pandas==2.0.3
 
 
 
 
 
74
  parso==0.8.3
 
 
 
75
  pdf2image==1.16.3
 
 
76
  pickleshare==0.7.5
77
+ Pillow==10.0.0
78
+ pip==23.2.1
79
+ platformdirs==3.9.1
80
+ prompt-toolkit==3.0.39
 
 
 
 
 
 
 
81
  psutil==5.9.5
 
82
  pure-eval==0.2.2
83
+ pyarrow==12.0.1
 
 
 
 
 
 
 
84
  pydantic==1.10.12
85
+ pydantic_core==2.4.0
86
+ pydub==0.25.1
 
 
87
  Pygments==2.15.1
 
 
 
 
88
  pyocr==0.8.3
 
 
89
  pyparsing==3.0.9
90
  pypdf==3.13.0
 
 
 
 
 
 
91
  python-dateutil==2.8.2
92
+ python-multipart==0.0.6
93
+ pytz==2023.3
94
+ pywin32==304
95
+ PyYAML==6.0.1
96
+ pyzmq==25.1.0
97
+ referencing==0.30.0
98
+ requests==2.31.0
99
+ rpds-py==0.9.2
100
+ semantic-version==2.10.0
101
+ setuptools==68.0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  six==1.16.0
103
+ sniffio==1.3.0
104
+ SQLAlchemy==2.0.19
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  stack-data==0.6.2
106
+ starlette==0.27.0
 
 
 
 
 
107
  tenacity==8.2.2
 
 
 
 
 
 
 
 
 
 
 
 
108
  toolz==0.12.0
109
+ tornado==6.3.2
 
110
  tqdm==4.65.0
111
  traitlets==5.9.0
112
+ typing_extensions==4.7.1
 
 
 
113
  typing-inspect==0.9.0
114
+ tzdata==2023.3
115
+ uc-micro-py==1.0.2
116
+ urllib3==2.0.4
117
+ uvicorn==0.23.1
 
 
118
  wcwidth==0.2.6
119
+ websockets==11.0.3
 
 
 
120
  wheel==0.38.4
121
+ xxhash==3.3.0
122
+ yarl==1.9.2
123
+ zipp==3.16.2