adamlu1 commited on
Commit
39f8e6b
1 Parent(s): eb8680c

add paddle ocr

Browse files
__pycache__/utils.cpython-312.pyc CHANGED
Binary files a/__pycache__/utils.cpython-312.pyc and b/__pycache__/utils.cpython-312.pyc differ
 
app.py CHANGED
@@ -77,7 +77,7 @@ def process(
77
  image_input.save(image_save_path)
78
  # import pdb; pdb.set_trace()
79
 
80
- ocr_bbox_rslt, is_goal_filtered = check_ocr_box(image_save_path, display_img = False, output_bb_format='xyxy', goal_filtering=None, easyocr_args={'paragraph': False, 'text_threshold':0.9})
81
  text, ocr_bbox = ocr_bbox_rslt
82
  # print('prompt:', prompt)
83
  dino_labled_img, label_coordinates, parsed_content_list = get_som_labeled_img(image_save_path, yolo_model, BOX_TRESHOLD = box_threshold, output_coord_in_ratio=True, ocr_bbox=ocr_bbox,draw_bbox_config=draw_bbox_config, caption_model_processor=caption_model_processor, ocr_text=text,iou_threshold=iou_threshold)
@@ -117,5 +117,5 @@ with gr.Blocks() as demo:
117
  )
118
 
119
  # demo.launch(debug=False, show_error=True, share=True)
120
- demo.launch(share=True, server_port=7861, server_name='0.0.0.0')
121
- # demo.queue().launch(share=False)
 
77
  image_input.save(image_save_path)
78
  # import pdb; pdb.set_trace()
79
 
80
+ ocr_bbox_rslt, is_goal_filtered = check_ocr_box(image_save_path, display_img = False, output_bb_format='xyxy', goal_filtering=None, easyocr_args={'paragraph': False, 'text_threshold':0.9}, use_paddleocr=True)
81
  text, ocr_bbox = ocr_bbox_rslt
82
  # print('prompt:', prompt)
83
  dino_labled_img, label_coordinates, parsed_content_list = get_som_labeled_img(image_save_path, yolo_model, BOX_TRESHOLD = box_threshold, output_coord_in_ratio=True, ocr_bbox=ocr_bbox,draw_bbox_config=draw_bbox_config, caption_model_processor=caption_model_processor, ocr_text=text,iou_threshold=iou_threshold)
 
117
  )
118
 
119
  # demo.launch(debug=False, show_error=True, share=True)
120
+ # demo.launch(share=True, server_port=7861, server_name='0.0.0.0')
121
+ demo.queue().launch(share=False)
imgs/saved_image_demo.png CHANGED
requirements.txt CHANGED
@@ -14,3 +14,5 @@ dill
14
  accelerate
15
  timm
16
  einops==0.8.0
 
 
 
14
  accelerate
15
  timm
16
  einops==0.8.0
17
+ paddlepaddle
18
+ paddleocr
utils.py CHANGED
@@ -18,7 +18,17 @@ import numpy as np
18
  # %matplotlib inline
19
  from matplotlib import pyplot as plt
20
  import easyocr
 
21
  reader = easyocr.Reader(['en'])
 
 
 
 
 
 
 
 
 
22
  import time
23
  import base64
24
 
@@ -370,14 +380,18 @@ def get_xywh_yolo(input):
370
 
371
 
372
 
373
- def check_ocr_box(image_path, display_img = True, output_bb_format='xywh', goal_filtering=None, easyocr_args=None):
374
- if easyocr_args is None:
375
- easyocr_args = {}
376
- result = reader.readtext(image_path, **easyocr_args)
377
- is_goal_filtered = False
378
- # print('goal filtering pred:', result[-5:])
379
- coord = [item[0] for item in result]
380
- text = [item[1] for item in result]
 
 
 
 
381
  # read the image using cv2
382
  if display_img:
383
  opencv_img = cv2.imread(image_path)
@@ -397,7 +411,7 @@ def check_ocr_box(image_path, display_img = True, output_bb_format='xywh', goal_
397
  elif output_bb_format == 'xyxy':
398
  bb = [get_xyxy(item) for item in coord]
399
  # print('bounding box!!!', bb)
400
- return (text, bb), is_goal_filtered
401
 
402
 
403
 
 
18
  # %matplotlib inline
19
  from matplotlib import pyplot as plt
20
  import easyocr
21
+ from paddleocr import PaddleOCR
22
  reader = easyocr.Reader(['en'])
23
+ paddle_ocr = PaddleOCR(
24
+ lang='en', # other lang also available
25
+ use_angle_cls=False,
26
+ use_gpu=False, # using cuda will conflict with pytorch in the same process
27
+ show_log=False,
28
+ max_batch_size=1024,
29
+ use_dilation=True, # improves accuracy
30
+ det_db_score_mode='slow', # improves accuracy
31
+ rec_batch_num=1024)
32
  import time
33
  import base64
34
 
 
380
 
381
 
382
 
383
+ def check_ocr_box(image_path, display_img = True, output_bb_format='xywh', goal_filtering=None, easyocr_args=None, use_paddleocr=False):
384
+ if use_paddleocr:
385
+ result = paddle_ocr.ocr(image_path, cls=False)[0]
386
+ coord = [item[0] for item in result]
387
+ text = [item[1][0] for item in result]
388
+ else: # EasyOCR
389
+ if easyocr_args is None:
390
+ easyocr_args = {}
391
+ result = reader.readtext(image_path, **easyocr_args)
392
+ # print('goal filtering pred:', result[-5:])
393
+ coord = [item[0] for item in result]
394
+ text = [item[1] for item in result]
395
  # read the image using cv2
396
  if display_img:
397
  opencv_img = cv2.imread(image_path)
 
411
  elif output_bb_format == 'xyxy':
412
  bb = [get_xyxy(item) for item in coord]
413
  # print('bounding box!!!', bb)
414
+ return (text, bb), goal_filtering
415
 
416
 
417