Spaces:

Tonic
/

GOT-OCR

Running on Zero

App Files Files Community

Tonic commited on Sep 14, 2024

Commit

405302e

unverified ·

1 Parent(s): 7dcbad8

improve file handling , display html

Browse files

Files changed (3) hide show

.gitignore +1 -1
app.py +4 -7
notes.py +92 -0

.gitignore CHANGED Viewed

@@ -1,3 +1,3 @@
 .DS_Store
 .venv/*
-notes.py

 .DS_Store
 .venv/*
+.notes.py

app.py CHANGED Viewed

@@ -73,17 +73,14 @@ def update_inputs(task):
             gr.update(visible=True, choices=["red", "green", "blue"]),
         ]
 def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
-    res, html_content = process_image(image, task, ocr_type, ocr_box, ocr_color)
     res = f"$$ {res} $$"
-    # res = res.replace("$$ \\begin{tabular}", "\\begin{tabular}")
-    # res = res.replace("\\end{tabular} $$", "\\end{tabular}")
-    # res = res.replace("\\(", "")
-    # res = res.replace("\\)", "")
     if html_content:
-        html_string = f'<iframe srcdoc="{html_content}" width="100%" height="600px"></iframe>'
-        return res, html_string
     return res, None
 def cleanup_old_files():

             gr.update(visible=True, choices=["red", "green", "blue"]),
         ]
 def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
+    res, html_content, unique_id = process_image(image, task, ocr_type, ocr_box, ocr_color)
     res = f"$$ {res} $$"
     if html_content:
+        iframe = f'<iframe srcdoc="{html_content}" width="100%" height="600px"></iframe>'
+        link = f'<a href="file={results_folder / f"{unique_id}.html"}" target="_blank">View Full Result</a>'
+        return res, f"{link}<br>{iframe}"
     return res, None
 def cleanup_old_files():

notes.py ADDED Viewed

	@@ -0,0 +1,92 @@

+def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
+    res, html_content = process_image(image, task, ocr_type, ocr_box, ocr_color)
+    res = f"$$ {res} $$"
+    # res = res.replace("$$ \\begin{tabular}", "\\begin{tabular}")
+    # res = res.replace("\\end{tabular} $$", "\\end{tabular}")
+    # res = res.replace("\\(", "")
+    # res = res.replace("\\)", "")
+    if html_content:
+        html_string = f'<iframe srcdoc="{html_content}" width="100%" height="600px"></iframe>'
+        return res, html_string
+    return res, None
+@spaces.GPU
+def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
+    demo_html = os.path.join(results_folder, "demo.html")
+    html_file = os.path.join(results_folder, f"{task.replace(' ', '_').lower()}.html")
+    tikz_file = os.path.join(results_folder, "tikz.html")
+    unique_id = str(uuid.uuid4())
+    with tempfile.NamedTemporaryFile(mode='w+', suffix='.html', delete=False, dir=results_folder) as temp_file:
+        temp_html_path = temp_file.name
+    if task == "Plain Text OCR":
+        res = model.chat(tokenizer, image, ocr_type='ocr')
+        return res, None, unique_id
+    else:
+        if task == "Format Text OCR":
+            res = model.chat(tokenizer, image, ocr_type='format', render=True, save_render_file=temp_html_path)
+        elif task == "Fine-grained OCR (Box)":
+            res = model.chat(tokenizer, image, ocr_type=ocr_type, ocr_box=ocr_box, render=True, save_render_file=temp_html_path)
+        elif task == "Fine-grained OCR (Color)":
+            res = model.chat(tokenizer, image, ocr_type=ocr_type, ocr_color=ocr_color, render=True, save_render_file=temp_html_path)
+        elif task == "Multi-crop OCR":
+            res = model.chat_crop(tokenizer, image, ocr_type='format', render=True, save_render_file=temp_html_path)
+        elif task == "Render Formatted OCR":
+            res = model.chat(tokenizer, image, ocr_type='format', render=True, save_render_file=temp_html_path)
+        # html_content = None
+        if os.path.exists(temp_html_path):
+            with open(temp_html_path, 'r') as f:
+                html_content = f.read()
+        if os.path.exists(demo_html):
+            with open(demo_html, 'r') as f:
+                html_content = f.read()
+        elif os.path.exists(html_file):
+            with open(html_file, 'r') as f:
+                html_content = f.read()
+        elif os.path.exists(tikz_file):
+            with open(tikz_file, 'r') as f:
+                html_content = f.read()
+        else:
+            html_content = None
+        return res, html_content, unique_id
+@spaces.GPU
+def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
+    demo_html = os.path.join(results_folder, "demo.html")
+    html_file = os.path.join(results_folder, f"{task.replace(' ', '_').lower()}.html")
+    tikz_file = os.path.join(results_folder, "tikz.html")
+    if task == "Plain Text OCR":
+        res = model.chat(tokenizer, image, ocr_type='ocr')
+        return res, None
+    else:
+        if task == "Format Text OCR":
+            res = model.chat(tokenizer, image, ocr_type='format', render=True, save_render_file=demo_html)
+        elif task == "Fine-grained OCR (Box)":
+            res = model.chat(tokenizer, image, ocr_type=ocr_type, ocr_box=ocr_box, render=True, save_render_file=demo_html)
+        elif task == "Fine-grained OCR (Color)":
+            res = model.chat(tokenizer, image, ocr_type=ocr_type, ocr_color=ocr_color, render=True, save_render_file=demo_html)
+        elif task == "Multi-crop OCR":
+            res = model.chat_crop(tokenizer, image, ocr_type='format', render=True, save_render_file=demo_html)
+        elif task == "Render Formatted OCR":
+            res = model.chat(tokenizer, image, ocr_type='format', render=True, save_render_file=demo_html)
+        if os.path.exists(demo_html):
+            with open(demo_html, 'r') as f:
+                html_content = f.read()
+        elif os.path.exists(html_file):
+            with open(html_file, 'r') as f:
+                html_content = f.read()
+        elif os.path.exists(tikz_file):
+            with open(tikz_file, 'r') as f:
+                html_content = f.read()
+        else:
+            html_content = None
+        return res, html_content