Spaces:
Sleeping
Sleeping
import os | |
os.system('cd ezocr;' | |
'pip install .; cd ..') | |
import gradio as gr | |
import pandas as pd | |
from PIL import ImageDraw | |
from easyocrlite import ReaderLite | |
from PIL import Image | |
from modelscope.pipelines import pipeline | |
from modelscope.utils.constant import Tasks | |
from modelscope.outputs import OutputKeys | |
# step 1. orc detection to find boxes | |
reader = ReaderLite(gpu=True) | |
# step 2. recognize ocr result according to ocr detection results | |
ocr_recognize = pipeline(Tasks.ocr_recognition, | |
model='damo/ofa_ocr-recognition_general_base_zh', model_revision='v1.0.0') | |
def get_images(img: str, reader: ReaderLite, **kwargs): | |
results = reader.process(img, **kwargs) | |
return results | |
# def ofa_ocr_gr(): | |
# def ocr_api(img): | |
# results = get_images(img, reader, max_size=4000, text_confidence=0.7, text_threshold=0.4, | |
# link_threshold=0.4, slope_ths=0., add_margin=0.04) | |
# box_list, image_list = zip(*results) | |
# ocr_result = [] | |
# for i, (box, image) in enumerate(zip(box_list, image_list)): | |
# image = Image.fromarray(image) | |
# result = ocr_recognize(image)[OutputKeys.TEXT][0].replace(" ", "") | |
# ocr_result.append([str(i + 1), result.replace(' ', '')]) | |
# result = pd.DataFrame(ocr_result, columns=['Box ID', 'Text']) | |
# return box_list,result ##返回box_list 方便作为api结果,并且预测的文字我也需要,因为这个只是需要安装boxlist排序就是最后结果 | |
# return ocr_api#返回子函数最后才能获取box ,至于ocr_demo这个gradio可有可无 | |
def ofa_ocr_gr(): | |
def ocr_api(img): | |
results = get_images(img, reader, max_size=4000, text_confidence=0.7, text_threshold=0.4, | |
link_threshold=0.4, slope_ths=0., add_margin=0.04) | |
print("ocr_api处理结果是") | |
#print(results) | |
#增加一个意外处理 | |
try: | |
box_list, image_list = zip(*results) | |
ocr_result = [] | |
for i, (box, image) in enumerate(zip(box_list, image_list)): | |
image = Image.fromarray(image) | |
result = ocr_recognize(image)[OutputKeys.TEXT][0].replace(" ", "") | |
ocr_result.append([str(i + 1), result.replace(' ', '')]) | |
result = pd.DataFrame(ocr_result, columns=['Box ID', 'Text']) | |
return box_list,result ##返回box_list 方便作为api结果,并且预测的文字我也需要,因为这个只是需要安装boxlist排序就是最后结果 | |
except Exception as e: | |
print("ocr_api处理失败") | |
print(e) | |
return None | |
return ocr_api#返回子函数最后才能获取box ,至于ocr_demo这个gradio可有可无 | |
if __name__ == "__main__": | |
ocr_demo = ofa_ocr_gr() | |
ocr_demo.launch( | |
share=True, | |
enable_queue=True, | |
) | |