Spaces:

breezedeus
/

Pix2Text-Demo

Running

App Files Files Community

Pix2Text-Demo / app.py

breezedeus

new gradio app for p2t v1.0

5bedb5a 6 months ago

raw

history blame

8.8 kB

	# coding: utf-8
	# Copyright (C) 2023, [Breezedeus](https://github.com/breezedeus).
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	# Ref: https://huggingface.co/spaces/hysts/Manga-OCR/blob/main/app.py

	import os
	import json
	import functools
	import random
	import string
	import time

	import yaml

	import gradio as gr
	import numpy as np

	# from cnstd.utils import pil_to_numpy, imsave

	from pix2text import Pix2Text
	from pix2text.utils import set_logger, merge_line_texts

	logger = set_logger()

	LANGUAGES = yaml.safe_load(open('languages.yaml', 'r', encoding='utf-8'))['languages']


	def get_p2t_model(lan_list: list):
	p2t = Pix2Text(languages=lan_list)
	return p2t


	def latex_render(latex_str):
	return f"$$\n{latex_str}\n$$"
	# return latex_str


	def recognize(lang_list, rec_type, resized_shape, image_file):
	lang_list = [LANGUAGES[l] for l in lang_list]
	p2t = get_p2t_model(lang_list)

	if rec_type == 'Formula & Text':
	suffix = list(string.ascii_letters)
	random.shuffle(suffix)
	suffix = ''.join(suffix[:6])
	out_det_fp = f'out-det-{time.time()}-{suffix}.jpg'
	outs = p2t(
	image_file, resized_shape=resized_shape, save_analysis_res=out_det_fp
	)
	# To get just the text contents, use:
	only_text = merge_line_texts(outs, auto_line_break=True)

	# return only_text, latex_render(only_text)
	return only_text, out_det_fp
	elif rec_type == 'Only Formula':
	only_text = p2t.recognize_formula(image_file)
	return latex_render(only_text), None
	elif rec_type == 'Only Text':
	only_text = p2t.recognize_text(image_file)
	return only_text, None


	def main():
	langs = list(LANGUAGES.keys())
	langs.sort(key=lambda x: x.lower())

	title = 'Demo'
	# example_func = functools.partial(
	# recognize,
	# new_size=768,
	# box_score_thresh=0.3,
	# min_box_size=10,
	# )
	# examples = [
	# [
	# 'ch_PP-OCRv3_det::onnx',
	# True,
	# 'number-densenet_lite_136-fc',
	# False,
	# 'docs/examples/card1-s.jpg',
	# ],
	# [
	# 'ch_PP-OCRv3_det::onnx',
	# True,
	# 'number-densenet_lite_136-fc',
	# False,
	# 'docs/examples/card2-s.jpg',
	# ],
	# [
	# 'ch_PP-OCRv3_det::onnx',
	# True,
	# 'number-densenet_lite_136-fc',
	# False,
	# 'docs/examples/cy1-s.jpg',
	# ],
	# [
	# 'ch_PP-OCRv3_det::onnx',
	# False,
	# 'densenet_lite_136-gru',
	# False,
	# 'docs/examples/huochepiao.jpeg',
	# ],
	# [
	# 'ch_PP-OCRv3_det::onnx',
	# False,
	# 'densenet_lite_136-gru',
	# False,
	# 'docs/examples/1_res.jpg',
	# ],
	# [
	# 'db_shufflenet_v2::pytorch',
	# False,
	# 'en_number_mobile_v2.0',
	# False,
	# 'docs/examples/en_book1.jpeg',
	# ],
	# [
	# 'db_shufflenet_v2::pytorch',
	# False,
	# 'densenet_lite_136-gru',
	# True,
	# 'docs/examples/beauty0.jpg',
	# ],
	# ]

	table_desc = """
	<div align="center">
	<img src="https://www.notion.so/image/https%3A%2F%2Fprod-files-secure.s3.us-west-2.amazonaws.com%2F9341931a-53f0-48e1-b026-0f1ad17b457c%2Fc41e0b1d-4869-4e39-93db-631569e6a38d%2FUntitled.png?table=block&id=3d0819ca-2e1a-46a7-b6f3-b4cf89cd045c" width="120px"/>

	[![Visitors](https://api.visitorbadge.io/api/visitors?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2Fbreezedeus%2FCnOCR-Demo&labelColor=%23697689&countColor=%23f5c791&style=flat&labelStyle=upper)](https://visitorbadge.io/status?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2Fbreezedeus%2FCnOCR-Demo)

	[![Discord](https://img.shields.io/discord/1200765964434821260?logo=discord&label=Discord)](https://discord.gg/H9FmDSMA)

	\| \| \|
	\| ------------------------------- \| --------------------------------------- \|
	\| 🏄 Free Web Service \| [p2t.breezedeus.com](https://p2t.breezedeus.com) \|
	\| 📀 Code \| [Github](https://github.com/breezedeus/pix2text) \|
	\| 💬 Discord \| [P2T @ Discord](https://discord.gg/H9FmDSMA) \|
	\| 👨🏻‍💻 Author \| [Breezedeus](https://www.breezedeus.com) \|

	If useful, please help to star 🌟 [Pix2Text](https://github.com/breezedeus/pix2text) 🙏
	</div>
	"""

	with gr.Blocks() as demo:
	gr.HTML(
	f'<h1 style="text-align: center; margin-bottom: 1rem;"><a href="https://github.com/breezedeus/pix2text" target="_blank">Pix2Text V1.0</a> {title}</h1>'
	)
	with gr.Row(equal_height=False):
	with gr.Column(min_width=200, variant='panel', scale=3):
	gr.Markdown('### Settings')
	lang_list = gr.Dropdown(
	label='Text Languages',
	choices=langs,
	value=['English', 'Chinese Simplified'],
	multiselect=True,
	info='Which languages to be recognized as Texts.',
	)
	rec_type = gr.Radio(
	choices=['Formula & Text', 'Only Formula', 'Only Text'],
	label='Image Type',
	value='Formula & Text',
	info='Which type of image to be recognized.',
	)
	resized_shape = gr.Slider(
	label='resized_shape',
	minimum=512,
	maximum=2048,
	value=608,
	step=32,
	)
	# with gr.Accordion('Choose Text Languages', open=False):
	# lang_list = gr.Checkboxgroup(
	# label='Text Languages',
	# choices=langs,
	# value=['English', 'Chinese Simplified'],
	# )

	with gr.Column(scale=6, variant='compact'):
	gr.Markdown('### Upload Image to be Recognized')
	image_file = gr.Image(label='Image', type="pil", image_mode='RGB', show_label=False)
	sub_btn = gr.Button("Submit", variant="primary")

	with gr.Column(scale=2, variant='compact'):
	gr.Markdown(table_desc)
	with gr.Row(equal_height=False):
	with gr.Column(scale=1, variant='compact'):
	gr.Markdown('Detection Result')
	det_result = gr.Image(
	label='Detection Result', scale=1, show_label=False
	)
	with gr.Column(scale=1, variant='compact'):
	gr.Markdown('Recognition Result')
	rec_result = gr.Textbox(
	label=f'Recognition Result',
	lines=5,
	value='',
	scale=1,
	show_label=False,
	show_copy_button=True,
	)
	# render_result = gr.Markdown(label=f'After Rendering', value='')
	# rec_result.change(latex_render, rec_result, render_result)
	sub_btn.click(
	recognize,
	inputs=[lang_list, rec_type, resized_shape, image_file,],
	outputs=[rec_result, det_result],
	)

	# gr.Examples(
	# label='示例',
	# examples=examples,
	# inputs=[
	# det_model_name,
	# is_single_line,
	# rec_model_name,
	# use_angle_clf,
	# image_file,
	# ],
	# outputs=[out_image, naive_warn, out_texts],
	# fn=example_func,
	# cache_examples=os.getenv('CACHE_EXAMPLES') == '1',
	# )

	demo.queue(max_size=10)
	demo.launch()


	if __name__ == '__main__':
	main()