Spaces:

baudm
/

PARSeq-OCR

Running

App Files Files Community

baudm commited on Jul 20, 2022

Commit

596254c

1 Parent(s): 4900d6a

Show raw output with confidence values + fix layout

Browse files

Files changed (1) hide show

app.py +20 -15

app.py CHANGED Viewed

@@ -23,7 +23,7 @@ import gradio as gr
 class App:
-    title = 'Scene Text Recognition with Permuted Autoregressive Sequence Models'
     models = ['parseq', 'parseq_tiny', 'abinet', 'crnn', 'trba', 'vitstr']
     def __init__(self):
@@ -49,19 +49,22 @@ class App:
         image = self._preprocess(image.convert('RGB')).unsqueeze(0)
         # Greedy decoding
         pred = model(image).softmax(-1)
-        label, confidence = model.tokenizer.decode(pred)
-        return label[0]
 def main():
     app = App()
-    with gr.Blocks(analytics_enabled=False, title=app.title) as demo:
-        gr.Markdown("""
             <div align="center">
-            # Scene Text Recognition with<br/>Permuted Autoregressive Sequence Models
             [![GitHub](https://img.shields.io/badge/baudm-parseq-blue?logo=github)](https://github.com/baudm/parseq)
             </div>
@@ -71,20 +74,22 @@ def main():
             2. Upload your own image, choose from the examples below, or draw on the canvas.
             3. Click **Read Image** or **Read Drawing**.
         """)
-        model_name = gr.Radio(app.models, value=app.models[0], label='Select STR model to use')
-        with gr.Row():
-            image_upload = gr.Image(type='pil', source='upload', label='Image')
-            image_canvas = gr.Image(type='pil', source='canvas', label='Drawing')
         with gr.Row():
-            read_upload = gr.Button('Read Image')
-            read_canvas = gr.Button('Read Drawing')
         output = gr.Textbox(max_lines=1, label='Model output')
         gr.Examples(glob.glob('demo_images/*.*'), inputs=image_upload)
-        read_upload.click(app, inputs=[model_name, image_upload], outputs=output)
-        read_canvas.click(app, inputs=[model_name, image_canvas], outputs=output)
     demo.launch()

 class App:
+    title = 'Scene Text Recognition with<br/>Permuted Autoregressive Sequence Models'
     models = ['parseq', 'parseq_tiny', 'abinet', 'crnn', 'trba', 'vitstr']
     def __init__(self):
         image = self._preprocess(image.convert('RGB')).unsqueeze(0)
         # Greedy decoding
         pred = model(image).softmax(-1)
+        label, _ = model.tokenizer.decode(pred)
+        raw_label, raw_confidence = model.tokenizer.decode(pred, raw=True)
+        # Format confidence values
+        max_len = 25 if model_name == 'crnn' else len(label[0]) + 1
+        conf = list(map('{:0.1f}'.format, raw_confidence[0][:max_len].tolist()))
+        return label[0], [raw_label[0][:max_len], conf]
 def main():
     app = App()
+    with gr.Blocks(analytics_enabled=False, title=app.title.replace('<br/>', ' ')) as demo:
+        gr.Markdown(f"""
             <div align="center">
+            # {app.title}
             [![GitHub](https://img.shields.io/badge/baudm-parseq-blue?logo=github)](https://github.com/baudm/parseq)
             </div>
             2. Upload your own image, choose from the examples below, or draw on the canvas.
             3. Click **Read Image** or **Read Drawing**.
         """)
+        model_name = gr.Radio(app.models, value=app.models[0], label='The STR model to use')
         with gr.Row():
+            with gr.Column():
+                image_upload = gr.Image(type='pil', source='upload', label='Image')
+                read_upload = gr.Button('Read Image')
+            with gr.Column():
+                image_canvas = gr.Image(type='pil', source='canvas', label='Drawing')
+                read_canvas = gr.Button('Read Drawing')
         output = gr.Textbox(max_lines=1, label='Model output')
+        raw_output = gr.Dataframe(row_count=2, col_count=0, label='Raw output with confidence values (interval: [0, 1], [B]: BOS or BLANK token, [E]: EOS token)')
         gr.Examples(glob.glob('demo_images/*.*'), inputs=image_upload)
+        read_upload.click(app, inputs=[model_name, image_upload], outputs=[output, raw_output])
+        read_canvas.click(app, inputs=[model_name, image_canvas], outputs=[output, raw_output])
     demo.launch()