xiaoyao9184 commited on
Commit
c97db84
·
verified ·
1 Parent(s): 4b996b4

Synced repo using 'sync_with_huggingface' Github Action

Browse files
Files changed (2) hide show
  1. gradio_app.py +10 -6
  2. requirements.txt +1 -1
gradio_app.py CHANGED
@@ -98,12 +98,14 @@ with gr.Blocks(title="Marker") as demo:
98
  in_num = gr.Slider(label="PDF file page number", minimum=1, maximum=1, value=1, step=1, visible=False)
99
  in_img = gr.Image(label="PDF file (preview)", type="pil", sources=None, visible=False)
100
 
101
- page_range_txt = gr.Textbox(label="Page range to parse, comma separated like 0,5-10,20", value=f"0-0")
102
  output_format_dd = gr.Dropdown(label="Output format", choices=["markdown", "json", "html"], value="markdown")
103
 
104
  force_ocr_ckb = gr.Checkbox(label="Force OCR", value=True, info="Force OCR on all pages")
105
  debug_ckb = gr.Checkbox(label="Debug", value=False, info="Show debug information")
106
- trun_marker_btn = gr.Button("Run Marker", interactive=False)
 
 
107
  with gr.Column():
108
  result_md = gr.Markdown(label="Result markdown", visible=False)
109
  result_json = gr.JSON(label="Result json", visible=False)
@@ -154,17 +156,19 @@ with gr.Blocks(title="Marker") as demo:
154
  page_range_txt.change(
155
  fn=check_page_range,
156
  inputs=[page_range_txt, in_file],
157
- outputs=[page_range_txt, trun_marker_btn]
158
  )
159
 
160
  # Run Marker
161
- def run_marker_img(filename, page_range, force_ocr, output_format, debug):
162
  cli_options = {
163
  "output_format": output_format,
164
  "page_range": page_range,
165
  "force_ocr": force_ocr,
166
  "debug": debug,
167
  "output_dir": settings.DEBUG_DATA_FOLDER if debug else None,
 
 
168
  }
169
  config_parser = ConfigParser(cli_options)
170
  rendered = convert_pdf(
@@ -213,9 +217,9 @@ with gr.Blocks(title="Marker") as demo:
213
  gr_debug_lay
214
  ]
215
 
216
- trun_marker_btn.click(
217
  fn=run_marker_img,
218
- inputs=[in_file, page_range_txt, force_ocr_ckb, output_format_dd, debug_ckb],
219
  outputs=[result_md, result_json, result_html, debug_img_pdf, debug_img_layout]
220
  )
221
 
 
98
  in_num = gr.Slider(label="PDF file page number", minimum=1, maximum=1, value=1, step=1, visible=False)
99
  in_img = gr.Image(label="PDF file (preview)", type="pil", sources=None, visible=False)
100
 
101
+ page_range_txt = gr.Textbox(label="Page range to parse, comma separated like 0,5-10,20", value=f"")
102
  output_format_dd = gr.Dropdown(label="Output format", choices=["markdown", "json", "html"], value="markdown")
103
 
104
  force_ocr_ckb = gr.Checkbox(label="Force OCR", value=True, info="Force OCR on all pages")
105
  debug_ckb = gr.Checkbox(label="Debug", value=False, info="Show debug information")
106
+ use_llm_ckb = gr.Checkbox(label="Use LLM", value=False, info="Use LLM for higher quality processing")
107
+ strip_existing_ocr_ckb = gr.Checkbox(label="Strip existing OCR", value=False, info="Strip existing OCR text from the PDF and re-OCR.")
108
+ run_marker_btn = gr.Button("Run Marker", interactive=False)
109
  with gr.Column():
110
  result_md = gr.Markdown(label="Result markdown", visible=False)
111
  result_json = gr.JSON(label="Result json", visible=False)
 
156
  page_range_txt.change(
157
  fn=check_page_range,
158
  inputs=[page_range_txt, in_file],
159
+ outputs=[page_range_txt, run_marker_btn]
160
  )
161
 
162
  # Run Marker
163
+ def run_marker_img(filename, page_range, force_ocr, output_format, debug, use_llm, strip_existing_ocr):
164
  cli_options = {
165
  "output_format": output_format,
166
  "page_range": page_range,
167
  "force_ocr": force_ocr,
168
  "debug": debug,
169
  "output_dir": settings.DEBUG_DATA_FOLDER if debug else None,
170
+ "use_llm": use_llm,
171
+ "strip_existing_ocr": strip_existing_ocr
172
  }
173
  config_parser = ConfigParser(cli_options)
174
  rendered = convert_pdf(
 
217
  gr_debug_lay
218
  ]
219
 
220
+ run_marker_btn.click(
221
  fn=run_marker_img,
222
+ inputs=[in_file, page_range_txt, force_ocr_ckb, output_format_dd, debug_ckb, use_llm_ckb, strip_existing_ocr_ckb],
223
  outputs=[result_md, result_json, result_html, debug_img_pdf, debug_img_layout]
224
  )
225
 
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
  torch==2.5.1
2
- marker-pdf==1.1.0
3
  gradio==5.8.0
4
  huggingface-hub==0.26.3
 
1
  torch==2.5.1
2
+ marker-pdf==1.2.0
3
  gradio==5.8.0
4
  huggingface-hub==0.26.3