Spaces:
Sleeping
Sleeping
import os | |
os.system('cd ezocr;' | |
'pip install .; cd ..') | |
import gradio as gr | |
import pandas as pd | |
from PIL import ImageDraw | |
from easyocrlite import ReaderLite | |
from PIL import Image | |
from modelscope.pipelines import pipeline | |
from modelscope.utils.constant import Tasks | |
from modelscope.outputs import OutputKeys | |
# step 1. orc detection to find boxes | |
reader = ReaderLite(gpu=True) | |
# step 2. recognize ocr result according to ocr detection results | |
ocr_recognize = pipeline(Tasks.ocr_recognition, | |
model='damo/ofa_ocr-recognition_general_base_zh', model_revision='v1.0.0') | |
def get_images(img: str, reader: ReaderLite, **kwargs): | |
results = reader.process(img, **kwargs) | |
return results | |
def draw_boxes(image, bounds, color='red', width=4): | |
draw = ImageDraw.Draw(image) | |
for i, bound in enumerate(bounds): | |
p0, p1, p2, p3 = bound | |
draw.text((p0[0]+5, p0[1]+5), str(i+1), fill=color, align='center') | |
draw.line([*p0, *p1, *p2, *p3, *p0], fill=color, width=width) | |
return image | |
def ofa_ocr_gr(): | |
def ocr_api(img): | |
results = get_images(img, reader, max_size=4000, text_confidence=0.7, text_threshold=0.4, | |
link_threshold=0.4, slope_ths=0., add_margin=0.04) | |
box_list, image_list = zip(*results) | |
draw_boxes(img, box_list) | |
ocr_result = [] | |
for i, (box, image) in enumerate(zip(box_list, image_list)): | |
image = Image.fromarray(image) | |
result = ocr_recognize(image)[OutputKeys.TEXT][0].replace(" ", "") | |
ocr_result.append([str(i + 1), result.replace(' ', '')]) | |
result = pd.DataFrame(ocr_result, columns=['Box ID', 'Text']) | |
return img, result, box_list ##返回box_list 方便微调模型 | |
examples = [ | |
"http://xingchen-data.oss-cn-zhangjiakou.aliyuncs.com/maas/ocr/qiaodaima.png", | |
"http://xingchen-data.oss-cn-zhangjiakou.aliyuncs.com/maas/ocr/shupai.png", | |
"http://xingchen-data.oss-cn-zhangjiakou.aliyuncs.com/maas/ocr/ocr_essay.jpg", | |
"http://xingchen-data.oss-cn-zhangjiakou.aliyuncs.com/maas/ocr/chinese.jpg", | |
"http://xingchen-data.oss-cn-zhangjiakou.aliyuncs.com/maas/ocr/benpao.jpeg", | |
"http://xingchen-data.oss-cn-zhangjiakou.aliyuncs.com/maas/ocr/gaidao.jpeg", | |
] | |
title = "<h1 align='center'>基于OFA的OCR识别的应用</h1>" | |
description = '中文OCR体验区,欢迎上传图片,静待检测文字返回~ 相关OCR代码和模型都已在ModelScope开源,支持finetune,欢迎大家在平台上使用!(注:受资源限制,这里只部署了通用OCR模型。)' | |
ocr_input_image = gr.components.Image(label='图片', type='pil') | |
ocr_output_image = gr.components.Image(label='图片') | |
ocr_output_text = gr.components.Dataframe(label='OCR结果', headers=['ID', '文本']) | |
# ocr_demo = gr.Interface( | |
# fn=ocr_api, | |
# inputs=[ocr_input_image], | |
# outputs=[ocr_output_image, ocr_output_text], | |
# title=title, | |
# description=description, | |
# allow_flagging='never', | |
# examples=examples, | |
# examples_per_page=5, | |
# cache_examples=True | |
# ) | |
ocr_demo = gr.Interface( | |
fn=ocr_api, | |
inputs=[ocr_input_image], | |
outputs=[ocr_output_image, ocr_output_text], | |
title=title, | |
description=description, | |
allow_flagging='never', | |
examples=examples, | |
examples_per_page=5 | |
) | |
return ocr_demo | |
if __name__ == "__main__": | |
ocr_demo = ofa_ocr_gr() | |
ocr_demo.launch( | |
share=True, | |
enable_queue=True, | |
) | |