rishiraj commited on
Commit
6fde86d
1 Parent(s): 7796366

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -50
app.py CHANGED
@@ -18,31 +18,6 @@ def use_marker(pdf_filepath):
18
  result = markdown_extractor.extract(content, config)
19
  return result
20
 
21
- with gr.Blocks(title="PDF data extraction with Marker & Indexify") as marker_demo:
22
- gr.HTML("<h1 style='text-align: center'>PDF data extraction with Marker & <a href='https://getindexify.ai/'>Indexify</a></h1>")
23
- gr.HTML("<p style='text-align: center'>Indexify is a scalable realtime and continuous indexing and structured extraction engine for unstructured data to build generative AI applications</p>")
24
- gr.HTML("<h3 style='text-align: center'>If you like this demo, please ⭐ Star us on <a href='https://github.com/tensorlakeai/indexify' target='_blank'>GitHub</a>!</h3>")
25
- gr.HTML("<h4 style='text-align: center'>Here's an example notebook that demonstrates how to build a continuous <a href='https://github.com/tensorlakeai/indexify/blob/main/docs/docs/examples/efficient_rag.ipynb' target='_blank'>extraction pipeline</a> with Indexify</h4>")
26
-
27
- with gr.Row():
28
- with gr.Column():
29
- gr.HTML(
30
- "<p><b>Step 1:</b> Upload a PDF file from local storage.</p>"
31
- "<p style='color: #A0A0A0;'>Use this demo for single PDF file only. "
32
- "You can extract from PDF files continuously and try various other extractors locally with "
33
- "<a href='https://getindexify.ai/'>Indexify</a>.</p>"
34
- )
35
- pdf_file_1 = gr.File(type="filepath")
36
- with gr.Column():
37
- gr.HTML("<p><b>Step 2:</b> Run the extractor.</p>")
38
- go_button_1 = gr.Button(value="Run Marker extractor", variant="primary")
39
- model_output_text_box_1 = gr.Textbox(label="Extractor Output", elem_id="model_output_text_box_1")
40
-
41
- with gr.Row():
42
- gr.HTML("<p style='text-align: center'>Developed with 🫶 by <a href='https://getindexify.ai/' target='_blank'>Indexify</a> | a <a href='https://www.tensorlake.ai/' target='_blank'>Tensorlake</a> product</p>")
43
-
44
- go_button_1.click(fn=use_marker, inputs=[pdf_file_1], outputs=[model_output_text_box_1])
45
-
46
  @spaces.GPU
47
  def use_pdf_extractor(pdf_filepath):
48
  if pdf_filepath is None:
@@ -54,32 +29,56 @@ def use_pdf_extractor(pdf_filepath):
54
  result = pdf_extractor.extract(content, config)
55
  return result
56
 
57
- with gr.Blocks(title="PDF data extraction with PDF Extractor & Indexify") as pdf_demo:
58
- gr.HTML("<h1 style='text-align: center'>PDF data extraction with PDF Extractor & <a href='https://getindexify.ai/'>Indexify</a></h1>")
59
- gr.HTML("<p style='text-align: center'>Indexify is a scalable realtime and continuous indexing and structured extraction engine for unstructured data to build generative AI applications</p>")
60
- gr.HTML("<h3 style='text-align: center'>If you like this demo, please Star us on <a href='https://github.com/tensorlakeai/indexify' target='_blank'>GitHub</a>!</h3>")
61
- gr.HTML("<h4 style='text-align: center'>Here's an example notebook that demonstrates how to build a continuous <a href='https://github.com/tensorlakeai/indexify/blob/main/docs/docs/examples/SEC_10_K_docs.ipynb' target='_blank'>extraction pipeline</a> with Indexify</h4>")
62
-
63
- with gr.Row():
64
- with gr.Column():
65
- gr.HTML(
66
- "<p><b>Step 1:</b> Upload a PDF file from local storage.</p>"
67
- "<p style='color: #A0A0A0;'>Use this demo for single PDF file only. "
68
- "You can extract from PDF files continuously and try various other extractors locally with "
69
- "<a href='https://getindexify.ai/'>Indexify</a>.</p>"
70
- )
71
- pdf_file_2 = gr.File(type="filepath")
72
- with gr.Column():
73
- gr.HTML("<p><b>Step 2:</b> Run the extractor.</p>")
74
- go_button_2 = gr.Button(value="Run PDF extractor", variant="primary")
75
- model_output_text_box_2 = gr.Textbox(label="Extractor Output", elem_id="model_output_text_box_2")
76
-
77
- with gr.Row():
78
- gr.HTML("<p style='text-align: center'>Developed with 🫶 by <a href='https://getindexify.ai/' target='_blank'>Indexify</a> | a <a href='https://www.tensorlake.ai/' target='_blank'>Tensorlake</a> product</p>")
79
-
80
- go_button_2.click(fn=use_pdf_extractor, inputs=[pdf_file_2], outputs=[model_output_text_box_2])
 
81
 
82
- demo = gr.TabbedInterface([marker_demo, pdf_demo], ["Marker Extractor", "PDF Extractor"], theme=gr.themes.Soft())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
  demo.queue()
85
  demo.launch()
 
18
  result = markdown_extractor.extract(content, config)
19
  return result
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  @spaces.GPU
22
  def use_pdf_extractor(pdf_filepath):
23
  if pdf_filepath is None:
 
29
  result = pdf_extractor.extract(content, config)
30
  return result
31
 
32
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
33
+ with gr.Tab("PDF data extraction with Marker & Indexify"):
34
+ gr.HTML("<h1 style='text-align: center'>PDF data extraction with Marker & <a href='https://getindexify.ai/'>Indexify</a></h1>")
35
+ gr.HTML("<p style='text-align: center'>Indexify is a scalable realtime and continuous indexing and structured extraction engine for unstructured data to build generative AI applications</p>")
36
+ gr.HTML("<h3 style='text-align: center'>If you like this demo, please Star us on <a href='https://github.com/tensorlakeai/indexify' target='_blank'>GitHub</a>!</h3>")
37
+ gr.HTML("<h4 style='text-align: center'>Here's an example notebook that demonstrates how to build a continuous <a href='https://github.com/tensorlakeai/indexify/blob/main/docs/docs/examples/efficient_rag.ipynb' target='_blank'>extraction pipeline</a> with Indexify</h4>")
38
+
39
+ with gr.Row():
40
+ with gr.Column():
41
+ gr.HTML(
42
+ "<p><b>Step 1:</b> Upload a PDF file from local storage.</p>"
43
+ "<p style='color: #A0A0A0;'>Use this demo for single PDF file only. "
44
+ "You can extract from PDF files continuously and try various other extractors locally with "
45
+ "<a href='https://getindexify.ai/'>Indexify</a>.</p>"
46
+ )
47
+ pdf_file_1 = gr.File(type="filepath")
48
+ with gr.Column():
49
+ gr.HTML("<p><b>Step 2:</b> Run the extractor.</p>")
50
+ go_button_1 = gr.Button(value="Run Marker extractor", variant="primary")
51
+ model_output_text_box_1 = gr.Textbox(label="Extractor Output", elem_id="model_output_text_box_1")
52
+
53
+ with gr.Row():
54
+ gr.HTML("<p style='text-align: center'>Developed with 🫶 by <a href='https://getindexify.ai/' target='_blank'>Indexify</a> | a <a href='https://www.tensorlake.ai/' target='_blank'>Tensorlake</a> product</p>")
55
+
56
+ go_button_1.click(fn=use_marker, inputs=[pdf_file_1], outputs=[model_output_text_box_1])
57
 
58
+ with gr.Tab("PDF data extraction with PDF Extractor & Indexify"):
59
+ gr.HTML("<h1 style='text-align: center'>PDF data extraction with PDF Extractor & <a href='https://getindexify.ai/'>Indexify</a></h1>")
60
+ gr.HTML("<p style='text-align: center'>Indexify is a scalable realtime and continuous indexing and structured extraction engine for unstructured data to build generative AI applications</p>")
61
+ gr.HTML("<h3 style='text-align: center'>If you like this demo, please ⭐ Star us on <a href='https://github.com/tensorlakeai/indexify' target='_blank'>GitHub</a>!</h3>")
62
+ gr.HTML("<h4 style='text-align: center'>Here's an example notebook that demonstrates how to build a continuous <a href='https://github.com/tensorlakeai/indexify/blob/main/docs/docs/examples/SEC_10_K_docs.ipynb' target='_blank'>extraction pipeline</a> with Indexify</h4>")
63
+
64
+ with gr.Row():
65
+ with gr.Column():
66
+ gr.HTML(
67
+ "<p><b>Step 1:</b> Upload a PDF file from local storage.</p>"
68
+ "<p style='color: #A0A0A0;'>Use this demo for single PDF file only. "
69
+ "You can extract from PDF files continuously and try various other extractors locally with "
70
+ "<a href='https://getindexify.ai/'>Indexify</a>.</p>"
71
+ )
72
+ pdf_file_2 = gr.File(type="filepath")
73
+ with gr.Column():
74
+ gr.HTML("<p><b>Step 2:</b> Run the extractor.</p>")
75
+ go_button_2 = gr.Button(value="Run PDF extractor", variant="primary")
76
+ model_output_text_box_2 = gr.Textbox(label="Extractor Output", elem_id="model_output_text_box_2")
77
+
78
+ with gr.Row():
79
+ gr.HTML("<p style='text-align: center'>Developed with 🫶 by <a href='https://getindexify.ai/' target='_blank'>Indexify</a> | a <a href='https://www.tensorlake.ai/' target='_blank'>Tensorlake</a> product</p>")
80
+
81
+ go_button_2.click(fn=use_pdf_extractor, inputs=[pdf_file_2], outputs=[model_output_text_box_2])
82
 
83
  demo.queue()
84
  demo.launch()