JaMe76 commited on
Commit
94c5764
Β·
1 Parent(s): 177bac3

update space

Browse files
Files changed (3) hide show
  1. .gitignore +2 -0
  2. app.py +37 -15
  3. requirements.txt +1 -0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ /.env
2
+ /dummy.py
app.py CHANGED
@@ -1,14 +1,26 @@
 
1
  import os
2
- os.system('pip install detectron2@git+https://github.com/facebookresearch/detectron2.git')
 
 
3
 
4
- # work around: https://discuss.huggingface.co/t/how-to-install-a-specific-version-of-gradio-in-spaces/13552
5
- os.system("pip uninstall -y gradio")
6
- os.system("pip install gradio==3.4.1")
7
- os.system("pip install packaging==21.3")
8
- os.system(os.environ["DD_ADDONS"])
9
 
10
- import time
11
- from os import getcwd, path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  import deepdoctection as dd
14
  from deepdoctection.dataflow.serialize import DataFromList
@@ -21,7 +33,10 @@ from dd_addons.extern.openai import OpenAiLmmTokenClassifier, is_api_key_valid
21
 
22
  import gradio as gr
23
 
24
- analyzer = get_loader(reset_config_file=True, config_overwrite=["OCR.USE_TESSERACT=False","OCR.USE_TEXTRACT=True"])
 
 
 
25
 
26
  demo = gr.Blocks(css="scrollbar.css")
27
 
@@ -30,6 +45,9 @@ def process_analyzer(openai_api_key, categories_str, instruction_str, img, pdf,
30
  if not is_api_key_valid(openai_api_key):
31
  return [], {}, "You have entered no or an invalid api key. Please enter a valid api key"
32
  categories_list = categories_str.split(",")
 
 
 
33
  register_string_categories_from_list(categories_list, "custom_token_classes")
34
  custom_token_class = dd.object_types_registry.get("custom_token_classes")
35
  print([token_class for token_class in custom_token_class])
@@ -61,13 +79,15 @@ def process_analyzer(openai_api_key, categories_str, instruction_str, img, pdf,
61
 
62
  json_out = {}
63
  dpts = []
 
64
 
65
  for idx, dp in enumerate(df):
66
  dpts.append(dp)
67
  json_out[f"page_{idx}"] = dp.get_token()
 
68
 
69
  return [dp.viz(show_cells=False, show_layouts=False, show_tables=False, show_words=True, show_token_class=True, ignore_default_token_class=True)
70
- for dp in dpts], json_out, "No error"
71
 
72
 
73
  with demo:
@@ -125,18 +145,20 @@ with demo:
125
  with gr.Box():
126
  gr.Markdown("<center><strong>JSON</strong></center>")
127
  json = gr.JSON()
 
 
 
 
 
 
128
  with gr.Column():
129
  with gr.Box():
130
  gr.Markdown("<center><strong>Layout detection</strong></center>")
131
  gallery = gr.Gallery(
132
  label="Output images", show_label=False, elem_id="gallery"
133
  ).style(grid=2)
134
- with gr.Row():
135
- with gr.Box():
136
- gr.Markdown("<center><strong>Table</strong></center>")
137
- html = gr.HTML()
138
 
139
  btn.click(fn=process_analyzer, inputs=[user_token, categories, instruction, inputs, inputs_pdf, max_imgs],
140
- outputs=[gallery, json, msg])
141
 
142
  demo.launch()
 
1
+ import time
2
  import os
3
+ from os import getcwd, path
4
+ import importlib.metadata
5
+ from dotenv import load_dotenv
6
 
 
 
 
 
 
7
 
8
+ def check_additional_requirements():
9
+ if importlib.util.find_spec("detectron2") is None:
10
+ os.system('pip install detectron2@git+https://github.com/facebookresearch/detectron2.git')
11
+ if importlib.util.find_spec("gradio") is not None:
12
+ if importlib.metadata.version("gradio")!="3.4.1":
13
+ os.system("pip uninstall -y gradio")
14
+ os.system("pip install gradio==3.4.1")
15
+ else:
16
+ os.system("pip install gradio==3.4.1")
17
+ os.system(os.environ["DD_ADDONS"])
18
+ return
19
+
20
+
21
+ load_dotenv()
22
+ check_additional_requirements()
23
+
24
 
25
  import deepdoctection as dd
26
  from deepdoctection.dataflow.serialize import DataFromList
 
33
 
34
  import gradio as gr
35
 
36
+ dd.Page.add_attribute_name("raw_json_output")
37
+ analyzer = get_loader(reset_config_file=True, config_overwrite=["OCR.USE_TESSERACT=False",
38
+ "OCR.USE_TEXTRACT=True",
39
+ "WORD_MATCHING.MAX_PARENT_ONLY=True"])
40
 
41
  demo = gr.Blocks(css="scrollbar.css")
42
 
 
45
  if not is_api_key_valid(openai_api_key):
46
  return [], {}, "You have entered no or an invalid api key. Please enter a valid api key"
47
  categories_list = categories_str.split(",")
48
+ if not categories_str:
49
+ return [], {}, "You did not enter any entities. Please enter a at least one category."
50
+
51
  register_string_categories_from_list(categories_list, "custom_token_classes")
52
  custom_token_class = dd.object_types_registry.get("custom_token_classes")
53
  print([token_class for token_class in custom_token_class])
 
79
 
80
  json_out = {}
81
  dpts = []
82
+ json_out_raw = {}
83
 
84
  for idx, dp in enumerate(df):
85
  dpts.append(dp)
86
  json_out[f"page_{idx}"] = dp.get_token()
87
+ json_out_raw[f"page_{idx}"] = dp.raw_json_output
88
 
89
  return [dp.viz(show_cells=False, show_layouts=False, show_tables=False, show_words=True, show_token_class=True, ignore_default_token_class=True)
90
+ for dp in dpts], json_out, json_out_raw, "No error"
91
 
92
 
93
  with demo:
 
145
  with gr.Box():
146
  gr.Markdown("<center><strong>JSON</strong></center>")
147
  json = gr.JSON()
148
+ with gr.Box():
149
+ gr.Markdown("<center><strong>ChatGPT output. </strong> <br />"
150
+ "It is possible that ChatGPT answers in an unexpected way, "
151
+ "such that the answer cannot be properly processed. In this case you might get"
152
+ "an empty JSON but you can still see the raw output.</center>")
153
+ json_raw = gr.JSON()
154
  with gr.Column():
155
  with gr.Box():
156
  gr.Markdown("<center><strong>Layout detection</strong></center>")
157
  gallery = gr.Gallery(
158
  label="Output images", show_label=False, elem_id="gallery"
159
  ).style(grid=2)
 
 
 
 
160
 
161
  btn.click(fn=process_analyzer, inputs=[user_token, categories, instruction, inputs, inputs_pdf, max_imgs],
162
+ outputs=[gallery, json, json_raw, msg])
163
 
164
  demo.launch()
requirements.txt CHANGED
@@ -1,3 +1,4 @@
 
1
  Pillow==9.5.0
2
  torch==1.12.0
3
  torchvision==0.13.0
 
1
+ python-dotenv
2
  Pillow==9.5.0
3
  torch==1.12.0
4
  torchvision==0.13.0