import os os.system('cd ezocr;' 'pip install .; cd ..') import gradio as gr import pandas as pd from PIL import ImageDraw from easyocrlite import ReaderLite from PIL import Image from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks from modelscope.outputs import OutputKeys # step 1. orc detection to find boxes reader = ReaderLite(gpu=True) # step 2. recognize ocr result according to ocr detection results ocr_recognize = pipeline(Tasks.ocr_recognition, model='damo/ofa_ocr-recognition_general_base_zh', model_revision='v1.0.0') def get_images(img: str, reader: ReaderLite, **kwargs): results = reader.process(img, **kwargs) return results # def ofa_ocr_gr(): # def ocr_api(img): # results = get_images(img, reader, max_size=4000, text_confidence=0.7, text_threshold=0.4, # link_threshold=0.4, slope_ths=0., add_margin=0.04) # box_list, image_list = zip(*results) # ocr_result = [] # for i, (box, image) in enumerate(zip(box_list, image_list)): # image = Image.fromarray(image) # result = ocr_recognize(image)[OutputKeys.TEXT][0].replace(" ", "") # ocr_result.append([str(i + 1), result.replace(' ', '')]) # result = pd.DataFrame(ocr_result, columns=['Box ID', 'Text']) # return box_list,result ##返回box_list 方便作为api结果,并且预测的文字我也需要,因为这个只是需要安装boxlist排序就是最后结果 # return ocr_api#返回子函数最后才能获取box ,至于ocr_demo这个gradio可有可无 def ofa_ocr_gr(): def ocr_api(img): results = get_images(img, reader, max_size=4000, text_confidence=0.7, text_threshold=0.4, link_threshold=0.4, slope_ths=0., add_margin=0.04) print("ocr_api处理结果是") #print(results) #增加一个意外处理 try: box_list, image_list = zip(*results) ocr_result = [] for i, (box, image) in enumerate(zip(box_list, image_list)): image = Image.fromarray(image) result = ocr_recognize(image)[OutputKeys.TEXT][0].replace(" ", "") ocr_result.append([str(i + 1), result.replace(' ', '')]) result = pd.DataFrame(ocr_result, columns=['Box ID', 'Text']) return box_list,result ##返回box_list 方便作为api结果,并且预测的文字我也需要,因为这个只是需要安装boxlist排序就是最后结果 except Exception as e: print("ocr_api处理失败") print(e) return None return ocr_api#返回子函数最后才能获取box ,至于ocr_demo这个gradio可有可无 if __name__ == "__main__": ocr_demo = ofa_ocr_gr() ocr_demo.launch( share=True, enable_queue=True, )