ocr_with_fastapi / ofa_ocr.py
rogerxavier's picture
Update ofa_ocr.py
fcb2388 verified
raw
history blame
2.97 kB
import os
os.system('cd ezocr;'
'pip install .; cd ..')
import gradio as gr
import pandas as pd
from PIL import ImageDraw
from easyocrlite import ReaderLite
from PIL import Image
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from modelscope.outputs import OutputKeys
# step 1. orc detection to find boxes
reader = ReaderLite(gpu=True)
# step 2. recognize ocr result according to ocr detection results
ocr_recognize = pipeline(Tasks.ocr_recognition,
model='damo/ofa_ocr-recognition_general_base_zh', model_revision='v1.0.0')
def get_images(img: str, reader: ReaderLite, **kwargs):
results = reader.process(img, **kwargs)
return results
# def ofa_ocr_gr():
# def ocr_api(img):
# results = get_images(img, reader, max_size=4000, text_confidence=0.7, text_threshold=0.4,
# link_threshold=0.4, slope_ths=0., add_margin=0.04)
# box_list, image_list = zip(*results)
# ocr_result = []
# for i, (box, image) in enumerate(zip(box_list, image_list)):
# image = Image.fromarray(image)
# result = ocr_recognize(image)[OutputKeys.TEXT][0].replace(" ", "")
# ocr_result.append([str(i + 1), result.replace(' ', '')])
# result = pd.DataFrame(ocr_result, columns=['Box ID', 'Text'])
# return box_list,result ##返回box_list 方便作为api结果,并且预测的文字我也需要,因为这个只是需要安装boxlist排序就是最后结果
# return ocr_api#返回子函数最后才能获取box ,至于ocr_demo这个gradio可有可无
def ofa_ocr_gr():
def ocr_api(img):
results = get_images(img, reader, max_size=4000, text_confidence=0.7, text_threshold=0.4,
link_threshold=0.4, slope_ths=0., add_margin=0.04)
print("ocr_api处理结果是")
#print(results)
#增加一个意外处理
try:
box_list, image_list = zip(*results)
ocr_result = []
for i, (box, image) in enumerate(zip(box_list, image_list)):
image = Image.fromarray(image)
result = ocr_recognize(image)[OutputKeys.TEXT][0].replace(" ", "")
ocr_result.append([str(i + 1), result.replace(' ', '')])
result = pd.DataFrame(ocr_result, columns=['Box ID', 'Text'])
return box_list,result ##返回box_list 方便作为api结果,并且预测的文字我也需要,因为这个只是需要安装boxlist排序就是最后结果
except Exception as e:
print("ocr_api处理失败")
print(e)
return None
return ocr_api#返回子函数最后才能获取box ,至于ocr_demo这个gradio可有可无
if __name__ == "__main__":
ocr_demo = ofa_ocr_gr()
ocr_demo.launch(
share=True,
enable_queue=True,
)