Danieldu commited on
Commit
17e862b
1 Parent(s): 3b57fe8

update model

Browse files
app.py CHANGED
@@ -1,13 +1,10 @@
1
  import os, io
2
- from paddleocr import PaddleOCR, draw_ocr
3
  from PIL import Image, ImageDraw
4
  import gradio as gr
5
 
6
 
7
- # 設定 Hugging Face Hub 的 Access Token
8
- os.environ["HF_TOKEN"] = "TWOCR"
9
-
10
- def inference(img_path):
11
 
12
  ocr = PaddleOCR(
13
  rec_char_dict_path='zhtw_common_dict.txt',
@@ -30,14 +27,36 @@ def inference(img_path):
30
  im_show_pil = draw_ocr(image, boxes, txts, scores, font_path="./simfang.ttf")
31
 
32
  return im_show_pil, "\n".join(txts)
33
-
34
 
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
- css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
38
 
 
39
  gr.Interface(
40
- inference,
41
  [gr.Image(type='filepath', label='圖片上傳')],
42
  outputs=[
43
  gr.Image(type="pil", label="識別結果"),
 
1
  import os, io
2
+ from paddleocr import PaddleOCR, draw_ocr,PPStructure
3
  from PIL import Image, ImageDraw
4
  import gradio as gr
5
 
6
 
7
+ def inference__ppocr(img_path):
 
 
 
8
 
9
  ocr = PaddleOCR(
10
  rec_char_dict_path='zhtw_common_dict.txt',
 
27
  im_show_pil = draw_ocr(image, boxes, txts, scores, font_path="./simfang.ttf")
28
 
29
  return im_show_pil, "\n".join(txts)
 
30
 
31
 
32
+ def inference__ppstructure(img_path):
33
+
34
+ ppsutructure = PPStructure(
35
+ rec_char_dict_path='zhtw_common_dict.txt',
36
+ use_gpu=False,
37
+ rec_image_shape="3, 48, 320",
38
+ ser_dict_path='ppocr/utils/dict/kie/clinical_class_list.txt'
39
+ )
40
+
41
+ result = ppsutructure.__call__(img_path)
42
+
43
+ for idx in range(len(result)):
44
+ res = result[idx]
45
+ for line in res:
46
+ print(line)
47
+
48
+ result = result[0]
49
+ image = Image.open(img_path).convert('RGB')
50
+ boxes = [line[0] for line in result]
51
+ txts = [line[1][0] if line[1] else '' for line in result] # 確保在無文字時 txts 還是個空字串
52
+ scores = [line[1][1] for line in result]
53
+ im_show_pil = draw_ocr(image, boxes, txts, scores, font_path="./simfang.ttf")
54
 
55
+ return im_show_pil, "\n".join(txts)
56
 
57
+
58
  gr.Interface(
59
+ inference__ppstructure,
60
  [gr.Image(type='filepath', label='圖片上傳')],
61
  outputs=[
62
  gr.Image(type="pil", label="識別結果"),
paddleocr.py CHANGED
@@ -286,30 +286,18 @@ MODEL_URLS = {
286
  }
287
  },
288
  'PP-StructureV2': {
289
- 'table': {
290
  'en': {
291
  'url':
292
- 'https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar',
293
- 'dict_path': 'ppocr/utils/dict/table_structure_dict.txt'
294
- },
295
- 'ch': {
296
- 'url':
297
- 'https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar',
298
- 'dict_path': 'ppocr/utils/dict/table_structure_dict_ch.txt'
299
- }
300
- },
301
- 'layout': {
302
- 'en': {
303
- 'url':
304
- 'https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_infer.tar',
305
  'dict_path':
306
- 'ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt'
307
  },
308
- 'ch': {
309
  'url':
310
- 'https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_cdla_infer.tar',
311
  'dict_path':
312
- 'ppocr/utils/dict/layout_dict/layout_cdla_dict.txt'
313
  }
314
  }
315
  }
@@ -346,7 +334,7 @@ def parse_args(mMain=True):
346
 
347
  for action in parser._actions:
348
  if action.dest in [
349
- 'rec_char_dict_path', 'table_char_dict_path', 'layout_dict_path'
350
  ]:
351
  action.default = None
352
  if mMain:
@@ -409,6 +397,7 @@ def get_model_config(type, version, model_type, lang):
409
  model_urls = MODEL_URLS[type]
410
  if version not in model_urls:
411
  version = DEFAULT_MODEL_VERSION
 
412
  if model_type not in model_urls[version]:
413
  if model_type in model_urls[DEFAULT_MODEL_VERSION]:
414
  version = DEFAULT_MODEL_VERSION
@@ -416,7 +405,6 @@ def get_model_config(type, version, model_type, lang):
416
  logger.error('{} models is not support, we only support {}'.format(
417
  model_type, model_urls[DEFAULT_MODEL_VERSION].keys()))
418
  sys.exit(-1)
419
-
420
  if lang not in model_urls[version][model_type]:
421
  if lang in model_urls[DEFAULT_MODEL_VERSION][model_type]:
422
  version = DEFAULT_MODEL_VERSION
@@ -607,31 +595,44 @@ class PPStructure(StructureSystem):
607
  params.rec_model_dir, rec_url = confirm_model_dir_url(
608
  params.rec_model_dir,
609
  os.path.join(BASE_DIR, 'whl', 'rec', lang), rec_model_config['url'])
610
- table_model_config = get_model_config(
611
- 'STRUCTURE', params.structure_version, 'table', table_lang)
612
- params.table_model_dir, table_url = confirm_model_dir_url(
613
- params.table_model_dir,
614
- os.path.join(BASE_DIR, 'whl', 'table'), table_model_config['url'])
615
- layout_model_config = get_model_config(
616
- 'STRUCTURE', params.structure_version, 'layout', lang)
617
- params.layout_model_dir, layout_url = confirm_model_dir_url(
618
- params.layout_model_dir,
619
- os.path.join(BASE_DIR, 'whl', 'layout'), layout_model_config['url'])
 
 
 
 
 
 
 
 
 
620
  # download model
621
  maybe_download(params.det_model_dir, det_url)
622
  maybe_download(params.rec_model_dir, rec_url)
623
- maybe_download(params.table_model_dir, table_url)
624
- maybe_download(params.layout_model_dir, layout_url)
 
625
 
626
  if params.rec_char_dict_path is None:
627
  params.rec_char_dict_path = str(
628
  Path(__file__).parent / rec_model_config['dict_path'])
629
- if params.table_char_dict_path is None:
630
- params.table_char_dict_path = str(
631
- Path(__file__).parent / table_model_config['dict_path'])
632
- if params.layout_dict_path is None:
633
- params.layout_dict_path = str(
634
- Path(__file__).parent / layout_model_config['dict_path'])
 
 
 
635
  logger.debug(params)
636
  super().__init__(params)
637
 
 
286
  }
287
  },
288
  'PP-StructureV2': {
289
+ 'kie': {
290
  'en': {
291
  'url':
292
+ 'https://huggingface.co/spaces/CallMeMrFern/ocr/ppstructure/models/kie/ser_clinical.tar',
 
 
 
 
 
 
 
 
 
 
 
 
293
  'dict_path':
294
+ 'ppocr/utils/dict/kie/clinical_class_list.txt'
295
  },
296
+ 'tw': {
297
  'url':
298
+ 'https://huggingface.co/spaces/CallMeMrFern/ocr/ppstructure/models/kie/ser_clinical.tar',
299
  'dict_path':
300
+ 'ppocr/utils/dict/kie/clinical_class_list.txt'
301
  }
302
  }
303
  }
 
334
 
335
  for action in parser._actions:
336
  if action.dest in [
337
+ 'rec_char_dict_path', 'table_char_dict_path', 'layout_dict_path','kie_dict_path'
338
  ]:
339
  action.default = None
340
  if mMain:
 
397
  model_urls = MODEL_URLS[type]
398
  if version not in model_urls:
399
  version = DEFAULT_MODEL_VERSION
400
+
401
  if model_type not in model_urls[version]:
402
  if model_type in model_urls[DEFAULT_MODEL_VERSION]:
403
  version = DEFAULT_MODEL_VERSION
 
405
  logger.error('{} models is not support, we only support {}'.format(
406
  model_type, model_urls[DEFAULT_MODEL_VERSION].keys()))
407
  sys.exit(-1)
 
408
  if lang not in model_urls[version][model_type]:
409
  if lang in model_urls[DEFAULT_MODEL_VERSION][model_type]:
410
  version = DEFAULT_MODEL_VERSION
 
595
  params.rec_model_dir, rec_url = confirm_model_dir_url(
596
  params.rec_model_dir,
597
  os.path.join(BASE_DIR, 'whl', 'rec', lang), rec_model_config['url'])
598
+
599
+ # table_model_config = get_model_config(
600
+ # 'STRUCTURE', params.structure_version, 'table', table_lang)
601
+ # params.table_model_dir, table_url = confirm_model_dir_url(
602
+ # params.table_model_dir,
603
+ # os.path.join(BASE_DIR, 'whl', 'table'), table_model_config['url'])
604
+ # print(params.structure_version)
605
+ # layout_model_config = get_model_config(
606
+ # 'STRUCTURE', params.structure_version, 'layout', lang)
607
+ # params.layout_model_dir, layout_url = confirm_model_dir_url(
608
+ # params.layout_model_dir,
609
+ # os.path.join(BASE_DIR, 'whl', 'layout'), layout_model_config['url'])
610
+
611
+ ser_model_config = get_model_config(
612
+ 'STRUCTURE', params.structure_version, 'kie', table_lang)
613
+ params.ser_model_dir, ser_url = confirm_model_dir_url(
614
+ params.ser_model_dir,
615
+ os.path.join(BASE_DIR, 'whl', 'kie'), ser_model_config['url'])
616
+ print(params.ser_model_dir)
617
  # download model
618
  maybe_download(params.det_model_dir, det_url)
619
  maybe_download(params.rec_model_dir, rec_url)
620
+ # maybe_download(params.table_model_dir, table_url)
621
+ # maybe_download(params.layout_model_dir, layout_url)
622
+ maybe_download(params.ser_model_dir, ser_url)
623
 
624
  if params.rec_char_dict_path is None:
625
  params.rec_char_dict_path = str(
626
  Path(__file__).parent / rec_model_config['dict_path'])
627
+ # if params.table_char_dict_path is None:
628
+ # params.table_char_dict_path = str(
629
+ # Path(__file__).parent / table_model_config['dict_path'])
630
+ # if params.layout_dict_path is None:
631
+ # params.layout_dict_path = str(
632
+ # Path(__file__).parent / layout_model_config['dict_path'])
633
+ if params.ser_dict_path is None:
634
+ params.ser_dict_path = str(
635
+ Path(__file__).parent / ser_model_config['dict_path'])
636
  logger.debug(params)
637
  super().__init__(params)
638
 
ppocr/utils/dict/kie/clinical_class_list.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hospitalname_key
2
+ hospitalname_value
3
+ hospitaladdress_key
4
+ hospitaladdress_value
5
+ department_key
6
+ department_value
7
+ diagnose_key
8
+ diagnose_value
9
+ doctorcomment_key
10
+ doctorcomment_value
11
+ doctorname_key
12
+ doctorname_value
13
+ other
ppocr/utils/dict/{kie_dict → kie}/xfund_class_list.txt RENAMED
File without changes
ppocr/utils/e2e_utils/__pycache__/extract_textpoint_fast.cpython-310.pyc ADDED
Binary file (12.9 kB). View file
 
ppocr/utils/e2e_utils/__pycache__/extract_textpoint_slow.cpython-310.pyc ADDED
Binary file (13.6 kB). View file
 
ppocr/utils/e2e_utils/__pycache__/pgnet_pp_utils.cpython-310.pyc ADDED
Binary file (3.93 kB). View file
 
ppstructure/models/kie/ser_clinical.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f2f54efb1b23b0e6ac00238c7d5060f6a285ff06d0e2e5d0c30ce24e7fd0fff
3
+ size 1113108480
ppstructure/table/tablepyxl/__pycache__/__init__.cpython-37.pyc DELETED
Binary file (144 Bytes)
 
ppstructure/table/tablepyxl/__pycache__/style.cpython-37.pyc DELETED
Binary file (10.7 kB)
 
ppstructure/table/tablepyxl/__pycache__/tablepyxl.cpython-37.pyc DELETED
Binary file (3.59 kB)
 
ppstructure/utility.py CHANGED
@@ -65,7 +65,7 @@ def init_args():
65
  "--mode",
66
  type=str,
67
  choices=['structure', 'kie'],
68
- default='structure',
69
  help='structure and kie is supported')
70
  parser.add_argument(
71
  "--image_orientation",
 
65
  "--mode",
66
  type=str,
67
  choices=['structure', 'kie'],
68
+ default='kie',
69
  help='structure and kie is supported')
70
  parser.add_argument(
71
  "--image_orientation",
test.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"table": {"en": {"url": "https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar", "dict_path": "ppocr/utils/dict/table_structure_dict.txt"}, "ch": {"url": "https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar", "dict_path": "ppocr/utils/dict/table_structure_dict_ch.txt"}}, "layout": {"en": {"url": "https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_infer.tar", "dict_path": "ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt"}, "ch": {"url": "https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_cdla_infer.tar", "dict_path": "ppocr/utils/dict/layout_dict/layout_cdla_dict.txt"}}, "kie": {"en": {"url": "https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_infer.tar", "dict_path": "ppocr/utils/dict/kie/clinical_class_list.txt"}, "ch": {"url": "https://huggingface.co/spaces/CallMeMrFern/ocr/ppstructure/models/kie/ser_clinical.tar", "dict_path": "ppocr/utils/dict/kie/clinical_class_list.txt"}}}
tools/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (129 Bytes). View file
 
tools/infer/__pycache__/predict_cls.cpython-310.pyc ADDED
Binary file (4.1 kB). View file
 
tools/infer/__pycache__/predict_det.cpython-310.pyc ADDED
Binary file (8.55 kB). View file
 
tools/infer/__pycache__/predict_rec.cpython-310.pyc ADDED
Binary file (13.7 kB). View file
 
tools/infer/__pycache__/predict_system.cpython-310.pyc ADDED
Binary file (7.11 kB). View file
 
tools/infer/__pycache__/utility.cpython-310.pyc ADDED
Binary file (17.3 kB). View file