Spaces:
Running
Running
xiaoyao9184
commited on
Synced repo using 'sync_with_huggingface' Github Action
Browse files- gradio_app.py +10 -6
- requirements.txt +1 -1
gradio_app.py
CHANGED
@@ -98,12 +98,14 @@ with gr.Blocks(title="Marker") as demo:
|
|
98 |
in_num = gr.Slider(label="PDF file page number", minimum=1, maximum=1, value=1, step=1, visible=False)
|
99 |
in_img = gr.Image(label="PDF file (preview)", type="pil", sources=None, visible=False)
|
100 |
|
101 |
-
page_range_txt = gr.Textbox(label="Page range to parse, comma separated like 0,5-10,20", value=f"
|
102 |
output_format_dd = gr.Dropdown(label="Output format", choices=["markdown", "json", "html"], value="markdown")
|
103 |
|
104 |
force_ocr_ckb = gr.Checkbox(label="Force OCR", value=True, info="Force OCR on all pages")
|
105 |
debug_ckb = gr.Checkbox(label="Debug", value=False, info="Show debug information")
|
106 |
-
|
|
|
|
|
107 |
with gr.Column():
|
108 |
result_md = gr.Markdown(label="Result markdown", visible=False)
|
109 |
result_json = gr.JSON(label="Result json", visible=False)
|
@@ -154,17 +156,19 @@ with gr.Blocks(title="Marker") as demo:
|
|
154 |
page_range_txt.change(
|
155 |
fn=check_page_range,
|
156 |
inputs=[page_range_txt, in_file],
|
157 |
-
outputs=[page_range_txt,
|
158 |
)
|
159 |
|
160 |
# Run Marker
|
161 |
-
def run_marker_img(filename, page_range, force_ocr, output_format, debug):
|
162 |
cli_options = {
|
163 |
"output_format": output_format,
|
164 |
"page_range": page_range,
|
165 |
"force_ocr": force_ocr,
|
166 |
"debug": debug,
|
167 |
"output_dir": settings.DEBUG_DATA_FOLDER if debug else None,
|
|
|
|
|
168 |
}
|
169 |
config_parser = ConfigParser(cli_options)
|
170 |
rendered = convert_pdf(
|
@@ -213,9 +217,9 @@ with gr.Blocks(title="Marker") as demo:
|
|
213 |
gr_debug_lay
|
214 |
]
|
215 |
|
216 |
-
|
217 |
fn=run_marker_img,
|
218 |
-
inputs=[in_file, page_range_txt, force_ocr_ckb, output_format_dd, debug_ckb],
|
219 |
outputs=[result_md, result_json, result_html, debug_img_pdf, debug_img_layout]
|
220 |
)
|
221 |
|
|
|
98 |
in_num = gr.Slider(label="PDF file page number", minimum=1, maximum=1, value=1, step=1, visible=False)
|
99 |
in_img = gr.Image(label="PDF file (preview)", type="pil", sources=None, visible=False)
|
100 |
|
101 |
+
page_range_txt = gr.Textbox(label="Page range to parse, comma separated like 0,5-10,20", value=f"")
|
102 |
output_format_dd = gr.Dropdown(label="Output format", choices=["markdown", "json", "html"], value="markdown")
|
103 |
|
104 |
force_ocr_ckb = gr.Checkbox(label="Force OCR", value=True, info="Force OCR on all pages")
|
105 |
debug_ckb = gr.Checkbox(label="Debug", value=False, info="Show debug information")
|
106 |
+
use_llm_ckb = gr.Checkbox(label="Use LLM", value=False, info="Use LLM for higher quality processing")
|
107 |
+
strip_existing_ocr_ckb = gr.Checkbox(label="Strip existing OCR", value=False, info="Strip existing OCR text from the PDF and re-OCR.")
|
108 |
+
run_marker_btn = gr.Button("Run Marker", interactive=False)
|
109 |
with gr.Column():
|
110 |
result_md = gr.Markdown(label="Result markdown", visible=False)
|
111 |
result_json = gr.JSON(label="Result json", visible=False)
|
|
|
156 |
page_range_txt.change(
|
157 |
fn=check_page_range,
|
158 |
inputs=[page_range_txt, in_file],
|
159 |
+
outputs=[page_range_txt, run_marker_btn]
|
160 |
)
|
161 |
|
162 |
# Run Marker
|
163 |
+
def run_marker_img(filename, page_range, force_ocr, output_format, debug, use_llm, strip_existing_ocr):
|
164 |
cli_options = {
|
165 |
"output_format": output_format,
|
166 |
"page_range": page_range,
|
167 |
"force_ocr": force_ocr,
|
168 |
"debug": debug,
|
169 |
"output_dir": settings.DEBUG_DATA_FOLDER if debug else None,
|
170 |
+
"use_llm": use_llm,
|
171 |
+
"strip_existing_ocr": strip_existing_ocr
|
172 |
}
|
173 |
config_parser = ConfigParser(cli_options)
|
174 |
rendered = convert_pdf(
|
|
|
217 |
gr_debug_lay
|
218 |
]
|
219 |
|
220 |
+
run_marker_btn.click(
|
221 |
fn=run_marker_img,
|
222 |
+
inputs=[in_file, page_range_txt, force_ocr_ckb, output_format_dd, debug_ckb, use_llm_ckb, strip_existing_ocr_ckb],
|
223 |
outputs=[result_md, result_json, result_html, debug_img_pdf, debug_img_layout]
|
224 |
)
|
225 |
|
requirements.txt
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
torch==2.5.1
|
2 |
-
marker-pdf==1.
|
3 |
gradio==5.8.0
|
4 |
huggingface-hub==0.26.3
|
|
|
1 |
torch==2.5.1
|
2 |
+
marker-pdf==1.2.0
|
3 |
gradio==5.8.0
|
4 |
huggingface-hub==0.26.3
|