rishiraj commited on
Commit
33f6a35
1 Parent(s): 4b3bfb7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -65
app.py CHANGED
@@ -23,49 +23,7 @@ def use_marker(pdf_filepath):
23
  config = MarkdownExtractorConfig(batch_multiplier=2)
24
 
25
  result = markdown_extractor.extract(content, config)
26
- return str(result)
27
-
28
- @spaces.GPU
29
- def use_pdf_extractor(pdf_filepath):
30
- if pdf_filepath is None:
31
- raise gr.Error("Please provide some input PDF: upload an PDF file")
32
-
33
- with open(pdf_filepath, "rb") as f:
34
- pdf_data = f.read()
35
-
36
- content = Content(content_type="application/pdf", data=pdf_data)
37
- config = PDFExtractorConfig(output_types=["text", "table"])
38
-
39
- result = pdf_extractor.extract(content, config)
40
- return str(result)
41
-
42
- @spaces.GPU
43
- def use_gemini(pdf_filepath, key):
44
- if pdf_filepath is None:
45
- raise gr.Error("Please provide some input PDF: upload an PDF file")
46
-
47
- with open(pdf_filepath, "rb") as f:
48
- pdf_data = f.read()
49
-
50
- content = Content(content_type="application/pdf", data=pdf_data)
51
- config = GeminiExtractorConfig(prompt="Extract all text from the document.", model_name="gemini-1.5-flash", key=key)
52
-
53
- result = gemini_extractor.extract(content, config)
54
- return str(result)
55
-
56
- @spaces.GPU
57
- def use_openai(pdf_filepath, key):
58
- if pdf_filepath is None:
59
- raise gr.Error("Please provide some input PDF: upload an PDF file")
60
-
61
- with open(pdf_filepath, "rb") as f:
62
- pdf_data = f.read()
63
-
64
- content = Content(content_type="application/pdf", data=pdf_data)
65
- config = OAIExtractorConfig(prompt="Extract all text from the document.", model_name="gpt-4o", key=key)
66
-
67
- result = oai_extractor.extract(content, config)
68
- return str(result)
69
 
70
  with gr.Blocks(title="PDF data extraction with Marker & Indexify") as marker_demo:
71
  gr.HTML("<h1 style='text-align: center'>PDF data extraction with Marker & <a href='https://getindexify.ai/'>Indexify</a></h1>")
@@ -83,7 +41,7 @@ with gr.Blocks(title="PDF data extraction with Marker & Indexify") as marker_dem
83
  "<a href='https://getindexify.ai/'>Indexify</a>.</p>"
84
  )
85
 
86
- pdf_file_marker = gr.File(type="filepath")
87
 
88
  with gr.Column():
89
  gr.HTML("<p><b>Step 2:</b> Run the extractor.</p>")
@@ -93,9 +51,9 @@ with gr.Blocks(title="PDF data extraction with Marker & Indexify") as marker_dem
93
  variant="primary",
94
  )
95
 
96
- model_output_text_box_marker = gr.Textbox(
97
  label="Extractor Output",
98
- elem_id="model_output_text_box_marker",
99
  )
100
 
101
  with gr.Row():
@@ -109,10 +67,24 @@ with gr.Blocks(title="PDF data extraction with Marker & Indexify") as marker_dem
109
 
110
  go_button.click(
111
  fn=use_marker,
112
- inputs = [pdf_file_marker],
113
- outputs = [model_output_text_box_marker]
114
  )
115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  with gr.Blocks(title="PDF data extraction with PDF Extractor & Indexify") as pdf_demo:
117
  gr.HTML("<h1 style='text-align: center'>PDF data extraction with PDF Extractor & <a href='https://getindexify.ai/'>Indexify</a></h1>")
118
  gr.HTML("<p style='text-align: center'>Indexify is a scalable realtime and continuous indexing and structured extraction engine for unstructured data to build generative AI applications</p>")
@@ -129,7 +101,7 @@ with gr.Blocks(title="PDF data extraction with PDF Extractor & Indexify") as pdf
129
  "<a href='https://getindexify.ai/'>Indexify</a>.</p>"
130
  )
131
 
132
- pdf_file_pdf = gr.File(type="filepath")
133
 
134
  with gr.Column():
135
  gr.HTML("<p><b>Step 2:</b> Run the extractor.</p>")
@@ -139,9 +111,9 @@ with gr.Blocks(title="PDF data extraction with PDF Extractor & Indexify") as pdf
139
  variant="primary",
140
  )
141
 
142
- model_output_text_box_pdf = gr.Textbox(
143
  label="Extractor Output",
144
- elem_id="model_output_text_box_pdf",
145
  )
146
 
147
  with gr.Row():
@@ -155,10 +127,23 @@ with gr.Blocks(title="PDF data extraction with PDF Extractor & Indexify") as pdf
155
 
156
  go_button.click(
157
  fn=use_pdf_extractor,
158
- inputs = [pdf_file_pdf],
159
- outputs = [model_output_text_box_pdf]
160
  )
161
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
  with gr.Blocks(title="PDF data extraction with Gemini & Indexify") as gemini_demo:
163
  gr.HTML("<h1 style='text-align: center'>PDF data extraction with Gemini & <a href='https://getindexify.ai/'>Indexify</a></h1>")
164
  gr.HTML("<p style='text-align: center'>Indexify is a scalable realtime and continuous indexing and structured extraction engine for unstructured data to build generative AI applications</p>")
@@ -175,11 +160,11 @@ with gr.Blocks(title="PDF data extraction with Gemini & Indexify") as gemini_dem
175
  "<a href='https://getindexify.ai/'>Indexify</a>.</p>"
176
  )
177
 
178
- pdf_file_gemini = gr.File(type="filepath")
179
 
180
  gr.HTML("<p><b>Step 2:</b> Enter your API key.</p>")
181
 
182
- key_gemini = gr.Textbox(
183
  info="Please enter your GEMINI_API_KEY",
184
  label="Key:"
185
  )
@@ -192,9 +177,9 @@ with gr.Blocks(title="PDF data extraction with Gemini & Indexify") as gemini_dem
192
  variant="primary",
193
  )
194
 
195
- model_output_text_box_gemini = gr.Textbox(
196
  label="Extractor Output",
197
- elem_id="model_output_text_box_gemini",
198
  )
199
 
200
  with gr.Row():
@@ -208,10 +193,23 @@ with gr.Blocks(title="PDF data extraction with Gemini & Indexify") as gemini_dem
208
 
209
  go_button.click(
210
  fn=use_gemini,
211
- inputs = [pdf_file_gemini, key_gemini],
212
- outputs = [model_output_text_box_gemini]
213
  )
214
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
  with gr.Blocks(title="PDF data extraction with OpenAI & Indexify") as openai_demo:
216
  gr.HTML("<h1 style='text-align: center'>PDF data extraction with OpenAI & <a href='https://getindexify.ai/'>Indexify</a></h1>")
217
  gr.HTML("<p style='text-align: center'>Indexify is a scalable realtime and continuous indexing and structured extraction engine for unstructured data to build generative AI applications</p>")
@@ -228,11 +226,11 @@ with gr.Blocks(title="PDF data extraction with OpenAI & Indexify") as openai_dem
228
  "<a href='https://getindexify.ai/'>Indexify</a>.</p>"
229
  )
230
 
231
- pdf_file_oai = gr.File(type="filepath")
232
 
233
  gr.HTML("<p><b>Step 2:</b> Enter your API key.</p>")
234
 
235
- key_oai = gr.Textbox(
236
  info="Please enter your OPENAI_API_KEY",
237
  label="Key:"
238
  )
@@ -245,9 +243,9 @@ with gr.Blocks(title="PDF data extraction with OpenAI & Indexify") as openai_dem
245
  variant="primary",
246
  )
247
 
248
- model_output_text_box_oai = gr.Textbox(
249
  label="Extractor Output",
250
- elem_id="model_output_text_box_oai",
251
  )
252
 
253
  with gr.Row():
@@ -261,8 +259,8 @@ with gr.Blocks(title="PDF data extraction with OpenAI & Indexify") as openai_dem
261
 
262
  go_button.click(
263
  fn=use_openai,
264
- inputs = [pdf_file_oai, key_oai],
265
- outputs = [model_output_text_box_oai]
266
  )
267
 
268
  demo = gr.TabbedInterface([marker_demo, pdf_demo, gemini_demo, openai_demo], ["Marker Extractor", "PDF Extractor", "Gemini Extractor", "OpenAI Extractor"], theme=gr.themes.Soft())
 
23
  config = MarkdownExtractorConfig(batch_multiplier=2)
24
 
25
  result = markdown_extractor.extract(content, config)
26
+ return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  with gr.Blocks(title="PDF data extraction with Marker & Indexify") as marker_demo:
29
  gr.HTML("<h1 style='text-align: center'>PDF data extraction with Marker & <a href='https://getindexify.ai/'>Indexify</a></h1>")
 
41
  "<a href='https://getindexify.ai/'>Indexify</a>.</p>"
42
  )
43
 
44
+ pdf_file = gr.File(type="filepath")
45
 
46
  with gr.Column():
47
  gr.HTML("<p><b>Step 2:</b> Run the extractor.</p>")
 
51
  variant="primary",
52
  )
53
 
54
+ model_output_text_box = gr.Textbox(
55
  label="Extractor Output",
56
+ elem_id="model_output_text_box",
57
  )
58
 
59
  with gr.Row():
 
67
 
68
  go_button.click(
69
  fn=use_marker,
70
+ inputs = [pdf_file],
71
+ outputs = [model_output_text_box]
72
  )
73
 
74
+ @spaces.GPU
75
+ def use_pdf_extractor(pdf_filepath):
76
+ if pdf_filepath is None:
77
+ raise gr.Error("Please provide some input PDF: upload an PDF file")
78
+
79
+ with open(pdf_filepath, "rb") as f:
80
+ pdf_data = f.read()
81
+
82
+ content = Content(content_type="application/pdf", data=pdf_data)
83
+ config = PDFExtractorConfig(output_types=["text", "table"])
84
+
85
+ result = pdf_extractor.extract(content, config)
86
+ return result
87
+
88
  with gr.Blocks(title="PDF data extraction with PDF Extractor & Indexify") as pdf_demo:
89
  gr.HTML("<h1 style='text-align: center'>PDF data extraction with PDF Extractor & <a href='https://getindexify.ai/'>Indexify</a></h1>")
90
  gr.HTML("<p style='text-align: center'>Indexify is a scalable realtime and continuous indexing and structured extraction engine for unstructured data to build generative AI applications</p>")
 
101
  "<a href='https://getindexify.ai/'>Indexify</a>.</p>"
102
  )
103
 
104
+ pdf_file = gr.File(type="filepath")
105
 
106
  with gr.Column():
107
  gr.HTML("<p><b>Step 2:</b> Run the extractor.</p>")
 
111
  variant="primary",
112
  )
113
 
114
+ model_output_text_box = gr.Textbox(
115
  label="Extractor Output",
116
+ elem_id="model_output_text_box",
117
  )
118
 
119
  with gr.Row():
 
127
 
128
  go_button.click(
129
  fn=use_pdf_extractor,
130
+ inputs = [pdf_file],
131
+ outputs = [model_output_text_box]
132
  )
133
 
134
+ def use_gemini(pdf_filepath, key):
135
+ if pdf_filepath is None:
136
+ raise gr.Error("Please provide some input PDF: upload an PDF file")
137
+
138
+ with open(pdf_filepath, "rb") as f:
139
+ pdf_data = f.read()
140
+
141
+ content = Content(content_type="application/pdf", data=pdf_data)
142
+ config = GeminiExtractorConfig(prompt="Extract all text from the document.", model_name="gemini-1.5-flash", key=key)
143
+
144
+ result = gemini_extractor.extract(content, config)
145
+ return result
146
+
147
  with gr.Blocks(title="PDF data extraction with Gemini & Indexify") as gemini_demo:
148
  gr.HTML("<h1 style='text-align: center'>PDF data extraction with Gemini & <a href='https://getindexify.ai/'>Indexify</a></h1>")
149
  gr.HTML("<p style='text-align: center'>Indexify is a scalable realtime and continuous indexing and structured extraction engine for unstructured data to build generative AI applications</p>")
 
160
  "<a href='https://getindexify.ai/'>Indexify</a>.</p>"
161
  )
162
 
163
+ pdf_file = gr.File(type="filepath")
164
 
165
  gr.HTML("<p><b>Step 2:</b> Enter your API key.</p>")
166
 
167
+ key = gr.Textbox(
168
  info="Please enter your GEMINI_API_KEY",
169
  label="Key:"
170
  )
 
177
  variant="primary",
178
  )
179
 
180
+ model_output_text_box = gr.Textbox(
181
  label="Extractor Output",
182
+ elem_id="model_output_text_box",
183
  )
184
 
185
  with gr.Row():
 
193
 
194
  go_button.click(
195
  fn=use_gemini,
196
+ inputs = [pdf_file, key],
197
+ outputs = [model_output_text_box]
198
  )
199
 
200
+ def use_openai(pdf_filepath, key):
201
+ if pdf_filepath is None:
202
+ raise gr.Error("Please provide some input PDF: upload an PDF file")
203
+
204
+ with open(pdf_filepath, "rb") as f:
205
+ pdf_data = f.read()
206
+
207
+ content = Content(content_type="application/pdf", data=pdf_data)
208
+ config = OAIExtractorConfig(prompt="Extract all text from the document.", model_name="gpt-4o", key=key)
209
+
210
+ result = oai_extractor.extract(content, config)
211
+ return result
212
+
213
  with gr.Blocks(title="PDF data extraction with OpenAI & Indexify") as openai_demo:
214
  gr.HTML("<h1 style='text-align: center'>PDF data extraction with OpenAI & <a href='https://getindexify.ai/'>Indexify</a></h1>")
215
  gr.HTML("<p style='text-align: center'>Indexify is a scalable realtime and continuous indexing and structured extraction engine for unstructured data to build generative AI applications</p>")
 
226
  "<a href='https://getindexify.ai/'>Indexify</a>.</p>"
227
  )
228
 
229
+ pdf_file = gr.File(type="filepath")
230
 
231
  gr.HTML("<p><b>Step 2:</b> Enter your API key.</p>")
232
 
233
+ key = gr.Textbox(
234
  info="Please enter your OPENAI_API_KEY",
235
  label="Key:"
236
  )
 
243
  variant="primary",
244
  )
245
 
246
+ model_output_text_box = gr.Textbox(
247
  label="Extractor Output",
248
+ elem_id="model_output_text_box",
249
  )
250
 
251
  with gr.Row():
 
259
 
260
  go_button.click(
261
  fn=use_openai,
262
+ inputs = [pdf_file, key],
263
+ outputs = [model_output_text_box]
264
  )
265
 
266
  demo = gr.TabbedInterface([marker_demo, pdf_demo, gemini_demo, openai_demo], ["Marker Extractor", "PDF Extractor", "Gemini Extractor", "OpenAI Extractor"], theme=gr.themes.Soft())