Pix2Text-Demo / app.py
breezedeus's picture
new gradio app for p2t v1.0
5bedb5a
raw
history blame
8.8 kB
# coding: utf-8
# Copyright (C) 2023, [Breezedeus](https://github.com/breezedeus).
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Ref: https://huggingface.co/spaces/hysts/Manga-OCR/blob/main/app.py
import os
import json
import functools
import random
import string
import time
import yaml
import gradio as gr
import numpy as np
# from cnstd.utils import pil_to_numpy, imsave
from pix2text import Pix2Text
from pix2text.utils import set_logger, merge_line_texts
logger = set_logger()
LANGUAGES = yaml.safe_load(open('languages.yaml', 'r', encoding='utf-8'))['languages']
def get_p2t_model(lan_list: list):
p2t = Pix2Text(languages=lan_list)
return p2t
def latex_render(latex_str):
return f"$$\n{latex_str}\n$$"
# return latex_str
def recognize(lang_list, rec_type, resized_shape, image_file):
lang_list = [LANGUAGES[l] for l in lang_list]
p2t = get_p2t_model(lang_list)
if rec_type == 'Formula & Text':
suffix = list(string.ascii_letters)
random.shuffle(suffix)
suffix = ''.join(suffix[:6])
out_det_fp = f'out-det-{time.time()}-{suffix}.jpg'
outs = p2t(
image_file, resized_shape=resized_shape, save_analysis_res=out_det_fp
)
# To get just the text contents, use:
only_text = merge_line_texts(outs, auto_line_break=True)
# return only_text, latex_render(only_text)
return only_text, out_det_fp
elif rec_type == 'Only Formula':
only_text = p2t.recognize_formula(image_file)
return latex_render(only_text), None
elif rec_type == 'Only Text':
only_text = p2t.recognize_text(image_file)
return only_text, None
def main():
langs = list(LANGUAGES.keys())
langs.sort(key=lambda x: x.lower())
title = 'Demo'
# example_func = functools.partial(
# recognize,
# new_size=768,
# box_score_thresh=0.3,
# min_box_size=10,
# )
# examples = [
# [
# 'ch_PP-OCRv3_det::onnx',
# True,
# 'number-densenet_lite_136-fc',
# False,
# 'docs/examples/card1-s.jpg',
# ],
# [
# 'ch_PP-OCRv3_det::onnx',
# True,
# 'number-densenet_lite_136-fc',
# False,
# 'docs/examples/card2-s.jpg',
# ],
# [
# 'ch_PP-OCRv3_det::onnx',
# True,
# 'number-densenet_lite_136-fc',
# False,
# 'docs/examples/cy1-s.jpg',
# ],
# [
# 'ch_PP-OCRv3_det::onnx',
# False,
# 'densenet_lite_136-gru',
# False,
# 'docs/examples/huochepiao.jpeg',
# ],
# [
# 'ch_PP-OCRv3_det::onnx',
# False,
# 'densenet_lite_136-gru',
# False,
# 'docs/examples/1_res.jpg',
# ],
# [
# 'db_shufflenet_v2::pytorch',
# False,
# 'en_number_mobile_v2.0',
# False,
# 'docs/examples/en_book1.jpeg',
# ],
# [
# 'db_shufflenet_v2::pytorch',
# False,
# 'densenet_lite_136-gru',
# True,
# 'docs/examples/beauty0.jpg',
# ],
# ]
table_desc = """
<div align="center">
<img src="https://www.notion.so/image/https%3A%2F%2Fprod-files-secure.s3.us-west-2.amazonaws.com%2F9341931a-53f0-48e1-b026-0f1ad17b457c%2Fc41e0b1d-4869-4e39-93db-631569e6a38d%2FUntitled.png?table=block&id=3d0819ca-2e1a-46a7-b6f3-b4cf89cd045c" width="120px"/>
[![Visitors](https://api.visitorbadge.io/api/visitors?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2Fbreezedeus%2FCnOCR-Demo&labelColor=%23697689&countColor=%23f5c791&style=flat&labelStyle=upper)](https://visitorbadge.io/status?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2Fbreezedeus%2FCnOCR-Demo)
[![Discord](https://img.shields.io/discord/1200765964434821260?logo=discord&label=Discord)](https://discord.gg/H9FmDSMA)
| | |
| ------------------------------- | --------------------------------------- |
| 🏄 **Free Web Service** | [p2t.breezedeus.com](https://p2t.breezedeus.com) |
| 📀 **Code** | [Github](https://github.com/breezedeus/pix2text) |
| 💬 **Discord** | [P2T @ Discord](https://discord.gg/H9FmDSMA) |
| 👨🏻‍💻 **Author** | [Breezedeus](https://www.breezedeus.com) |
If useful, please help to **star 🌟 [Pix2Text](https://github.com/breezedeus/pix2text)** 🙏
</div>
"""
with gr.Blocks() as demo:
gr.HTML(
f'<h1 style="text-align: center; margin-bottom: 1rem;"><a href="https://github.com/breezedeus/pix2text" target="_blank">Pix2Text V1.0</a> {title}</h1>'
)
with gr.Row(equal_height=False):
with gr.Column(min_width=200, variant='panel', scale=3):
gr.Markdown('### Settings')
lang_list = gr.Dropdown(
label='Text Languages',
choices=langs,
value=['English', 'Chinese Simplified'],
multiselect=True,
info='Which languages to be recognized as Texts.',
)
rec_type = gr.Radio(
choices=['Formula & Text', 'Only Formula', 'Only Text'],
label='Image Type',
value='Formula & Text',
info='Which type of image to be recognized.',
)
resized_shape = gr.Slider(
label='resized_shape',
minimum=512,
maximum=2048,
value=608,
step=32,
)
# with gr.Accordion('Choose Text Languages', open=False):
# lang_list = gr.Checkboxgroup(
# label='Text Languages',
# choices=langs,
# value=['English', 'Chinese Simplified'],
# )
with gr.Column(scale=6, variant='compact'):
gr.Markdown('### Upload Image to be Recognized')
image_file = gr.Image(label='Image', type="pil", image_mode='RGB', show_label=False)
sub_btn = gr.Button("Submit", variant="primary")
with gr.Column(scale=2, variant='compact'):
gr.Markdown(table_desc)
with gr.Row(equal_height=False):
with gr.Column(scale=1, variant='compact'):
gr.Markdown('**Detection Result**')
det_result = gr.Image(
label='Detection Result', scale=1, show_label=False
)
with gr.Column(scale=1, variant='compact'):
gr.Markdown('**Recognition Result**')
rec_result = gr.Textbox(
label=f'Recognition Result',
lines=5,
value='',
scale=1,
show_label=False,
show_copy_button=True,
)
# render_result = gr.Markdown(label=f'After Rendering', value='')
# rec_result.change(latex_render, rec_result, render_result)
sub_btn.click(
recognize,
inputs=[lang_list, rec_type, resized_shape, image_file,],
outputs=[rec_result, det_result],
)
# gr.Examples(
# label='示例',
# examples=examples,
# inputs=[
# det_model_name,
# is_single_line,
# rec_model_name,
# use_angle_clf,
# image_file,
# ],
# outputs=[out_image, naive_warn, out_texts],
# fn=example_func,
# cache_examples=os.getenv('CACHE_EXAMPLES') == '1',
# )
demo.queue(max_size=10)
demo.launch()
if __name__ == '__main__':
main()