Danieldu
commited on
Commit
•
17e862b
1
Parent(s):
3b57fe8
update model
Browse files- app.py +27 -8
- paddleocr.py +39 -38
- ppocr/utils/dict/kie/clinical_class_list.txt +13 -0
- ppocr/utils/dict/{kie_dict → kie}/xfund_class_list.txt +0 -0
- ppocr/utils/e2e_utils/__pycache__/extract_textpoint_fast.cpython-310.pyc +0 -0
- ppocr/utils/e2e_utils/__pycache__/extract_textpoint_slow.cpython-310.pyc +0 -0
- ppocr/utils/e2e_utils/__pycache__/pgnet_pp_utils.cpython-310.pyc +0 -0
- ppstructure/models/kie/ser_clinical.tar +3 -0
- ppstructure/table/tablepyxl/__pycache__/__init__.cpython-37.pyc +0 -0
- ppstructure/table/tablepyxl/__pycache__/style.cpython-37.pyc +0 -0
- ppstructure/table/tablepyxl/__pycache__/tablepyxl.cpython-37.pyc +0 -0
- ppstructure/utility.py +1 -1
- test.json +1 -0
- tools/__pycache__/__init__.cpython-310.pyc +0 -0
- tools/infer/__pycache__/predict_cls.cpython-310.pyc +0 -0
- tools/infer/__pycache__/predict_det.cpython-310.pyc +0 -0
- tools/infer/__pycache__/predict_rec.cpython-310.pyc +0 -0
- tools/infer/__pycache__/predict_system.cpython-310.pyc +0 -0
- tools/infer/__pycache__/utility.cpython-310.pyc +0 -0
app.py
CHANGED
@@ -1,13 +1,10 @@
|
|
1 |
import os, io
|
2 |
-
from paddleocr import PaddleOCR, draw_ocr
|
3 |
from PIL import Image, ImageDraw
|
4 |
import gradio as gr
|
5 |
|
6 |
|
7 |
-
|
8 |
-
os.environ["HF_TOKEN"] = "TWOCR"
|
9 |
-
|
10 |
-
def inference(img_path):
|
11 |
|
12 |
ocr = PaddleOCR(
|
13 |
rec_char_dict_path='zhtw_common_dict.txt',
|
@@ -30,14 +27,36 @@ def inference(img_path):
|
|
30 |
im_show_pil = draw_ocr(image, boxes, txts, scores, font_path="./simfang.ttf")
|
31 |
|
32 |
return im_show_pil, "\n".join(txts)
|
33 |
-
|
34 |
|
35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
-
|
38 |
|
|
|
39 |
gr.Interface(
|
40 |
-
|
41 |
[gr.Image(type='filepath', label='圖片上傳')],
|
42 |
outputs=[
|
43 |
gr.Image(type="pil", label="識別結果"),
|
|
|
1 |
import os, io
|
2 |
+
from paddleocr import PaddleOCR, draw_ocr,PPStructure
|
3 |
from PIL import Image, ImageDraw
|
4 |
import gradio as gr
|
5 |
|
6 |
|
7 |
+
def inference__ppocr(img_path):
|
|
|
|
|
|
|
8 |
|
9 |
ocr = PaddleOCR(
|
10 |
rec_char_dict_path='zhtw_common_dict.txt',
|
|
|
27 |
im_show_pil = draw_ocr(image, boxes, txts, scores, font_path="./simfang.ttf")
|
28 |
|
29 |
return im_show_pil, "\n".join(txts)
|
|
|
30 |
|
31 |
|
32 |
+
def inference__ppstructure(img_path):
|
33 |
+
|
34 |
+
ppsutructure = PPStructure(
|
35 |
+
rec_char_dict_path='zhtw_common_dict.txt',
|
36 |
+
use_gpu=False,
|
37 |
+
rec_image_shape="3, 48, 320",
|
38 |
+
ser_dict_path='ppocr/utils/dict/kie/clinical_class_list.txt'
|
39 |
+
)
|
40 |
+
|
41 |
+
result = ppsutructure.__call__(img_path)
|
42 |
+
|
43 |
+
for idx in range(len(result)):
|
44 |
+
res = result[idx]
|
45 |
+
for line in res:
|
46 |
+
print(line)
|
47 |
+
|
48 |
+
result = result[0]
|
49 |
+
image = Image.open(img_path).convert('RGB')
|
50 |
+
boxes = [line[0] for line in result]
|
51 |
+
txts = [line[1][0] if line[1] else '' for line in result] # 確保在無文字時 txts 還是個空字串
|
52 |
+
scores = [line[1][1] for line in result]
|
53 |
+
im_show_pil = draw_ocr(image, boxes, txts, scores, font_path="./simfang.ttf")
|
54 |
|
55 |
+
return im_show_pil, "\n".join(txts)
|
56 |
|
57 |
+
|
58 |
gr.Interface(
|
59 |
+
inference__ppstructure,
|
60 |
[gr.Image(type='filepath', label='圖片上傳')],
|
61 |
outputs=[
|
62 |
gr.Image(type="pil", label="識別結果"),
|
paddleocr.py
CHANGED
@@ -286,30 +286,18 @@ MODEL_URLS = {
|
|
286 |
}
|
287 |
},
|
288 |
'PP-StructureV2': {
|
289 |
-
'
|
290 |
'en': {
|
291 |
'url':
|
292 |
-
'https://
|
293 |
-
'dict_path': 'ppocr/utils/dict/table_structure_dict.txt'
|
294 |
-
},
|
295 |
-
'ch': {
|
296 |
-
'url':
|
297 |
-
'https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar',
|
298 |
-
'dict_path': 'ppocr/utils/dict/table_structure_dict_ch.txt'
|
299 |
-
}
|
300 |
-
},
|
301 |
-
'layout': {
|
302 |
-
'en': {
|
303 |
-
'url':
|
304 |
-
'https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_infer.tar',
|
305 |
'dict_path':
|
306 |
-
'ppocr/utils/dict/
|
307 |
},
|
308 |
-
'
|
309 |
'url':
|
310 |
-
'https://
|
311 |
'dict_path':
|
312 |
-
'ppocr/utils/dict/
|
313 |
}
|
314 |
}
|
315 |
}
|
@@ -346,7 +334,7 @@ def parse_args(mMain=True):
|
|
346 |
|
347 |
for action in parser._actions:
|
348 |
if action.dest in [
|
349 |
-
'rec_char_dict_path', 'table_char_dict_path', 'layout_dict_path'
|
350 |
]:
|
351 |
action.default = None
|
352 |
if mMain:
|
@@ -409,6 +397,7 @@ def get_model_config(type, version, model_type, lang):
|
|
409 |
model_urls = MODEL_URLS[type]
|
410 |
if version not in model_urls:
|
411 |
version = DEFAULT_MODEL_VERSION
|
|
|
412 |
if model_type not in model_urls[version]:
|
413 |
if model_type in model_urls[DEFAULT_MODEL_VERSION]:
|
414 |
version = DEFAULT_MODEL_VERSION
|
@@ -416,7 +405,6 @@ def get_model_config(type, version, model_type, lang):
|
|
416 |
logger.error('{} models is not support, we only support {}'.format(
|
417 |
model_type, model_urls[DEFAULT_MODEL_VERSION].keys()))
|
418 |
sys.exit(-1)
|
419 |
-
|
420 |
if lang not in model_urls[version][model_type]:
|
421 |
if lang in model_urls[DEFAULT_MODEL_VERSION][model_type]:
|
422 |
version = DEFAULT_MODEL_VERSION
|
@@ -607,31 +595,44 @@ class PPStructure(StructureSystem):
|
|
607 |
params.rec_model_dir, rec_url = confirm_model_dir_url(
|
608 |
params.rec_model_dir,
|
609 |
os.path.join(BASE_DIR, 'whl', 'rec', lang), rec_model_config['url'])
|
610 |
-
|
611 |
-
|
612 |
-
params.
|
613 |
-
|
614 |
-
|
615 |
-
|
616 |
-
|
617 |
-
|
618 |
-
|
619 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
620 |
# download model
|
621 |
maybe_download(params.det_model_dir, det_url)
|
622 |
maybe_download(params.rec_model_dir, rec_url)
|
623 |
-
maybe_download(params.table_model_dir, table_url)
|
624 |
-
maybe_download(params.layout_model_dir, layout_url)
|
|
|
625 |
|
626 |
if params.rec_char_dict_path is None:
|
627 |
params.rec_char_dict_path = str(
|
628 |
Path(__file__).parent / rec_model_config['dict_path'])
|
629 |
-
if params.table_char_dict_path is None:
|
630 |
-
|
631 |
-
|
632 |
-
if params.layout_dict_path is None:
|
633 |
-
|
634 |
-
|
|
|
|
|
|
|
635 |
logger.debug(params)
|
636 |
super().__init__(params)
|
637 |
|
|
|
286 |
}
|
287 |
},
|
288 |
'PP-StructureV2': {
|
289 |
+
'kie': {
|
290 |
'en': {
|
291 |
'url':
|
292 |
+
'https://huggingface.co/spaces/CallMeMrFern/ocr/ppstructure/models/kie/ser_clinical.tar',
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
293 |
'dict_path':
|
294 |
+
'ppocr/utils/dict/kie/clinical_class_list.txt'
|
295 |
},
|
296 |
+
'tw': {
|
297 |
'url':
|
298 |
+
'https://huggingface.co/spaces/CallMeMrFern/ocr/ppstructure/models/kie/ser_clinical.tar',
|
299 |
'dict_path':
|
300 |
+
'ppocr/utils/dict/kie/clinical_class_list.txt'
|
301 |
}
|
302 |
}
|
303 |
}
|
|
|
334 |
|
335 |
for action in parser._actions:
|
336 |
if action.dest in [
|
337 |
+
'rec_char_dict_path', 'table_char_dict_path', 'layout_dict_path','kie_dict_path'
|
338 |
]:
|
339 |
action.default = None
|
340 |
if mMain:
|
|
|
397 |
model_urls = MODEL_URLS[type]
|
398 |
if version not in model_urls:
|
399 |
version = DEFAULT_MODEL_VERSION
|
400 |
+
|
401 |
if model_type not in model_urls[version]:
|
402 |
if model_type in model_urls[DEFAULT_MODEL_VERSION]:
|
403 |
version = DEFAULT_MODEL_VERSION
|
|
|
405 |
logger.error('{} models is not support, we only support {}'.format(
|
406 |
model_type, model_urls[DEFAULT_MODEL_VERSION].keys()))
|
407 |
sys.exit(-1)
|
|
|
408 |
if lang not in model_urls[version][model_type]:
|
409 |
if lang in model_urls[DEFAULT_MODEL_VERSION][model_type]:
|
410 |
version = DEFAULT_MODEL_VERSION
|
|
|
595 |
params.rec_model_dir, rec_url = confirm_model_dir_url(
|
596 |
params.rec_model_dir,
|
597 |
os.path.join(BASE_DIR, 'whl', 'rec', lang), rec_model_config['url'])
|
598 |
+
|
599 |
+
# table_model_config = get_model_config(
|
600 |
+
# 'STRUCTURE', params.structure_version, 'table', table_lang)
|
601 |
+
# params.table_model_dir, table_url = confirm_model_dir_url(
|
602 |
+
# params.table_model_dir,
|
603 |
+
# os.path.join(BASE_DIR, 'whl', 'table'), table_model_config['url'])
|
604 |
+
# print(params.structure_version)
|
605 |
+
# layout_model_config = get_model_config(
|
606 |
+
# 'STRUCTURE', params.structure_version, 'layout', lang)
|
607 |
+
# params.layout_model_dir, layout_url = confirm_model_dir_url(
|
608 |
+
# params.layout_model_dir,
|
609 |
+
# os.path.join(BASE_DIR, 'whl', 'layout'), layout_model_config['url'])
|
610 |
+
|
611 |
+
ser_model_config = get_model_config(
|
612 |
+
'STRUCTURE', params.structure_version, 'kie', table_lang)
|
613 |
+
params.ser_model_dir, ser_url = confirm_model_dir_url(
|
614 |
+
params.ser_model_dir,
|
615 |
+
os.path.join(BASE_DIR, 'whl', 'kie'), ser_model_config['url'])
|
616 |
+
print(params.ser_model_dir)
|
617 |
# download model
|
618 |
maybe_download(params.det_model_dir, det_url)
|
619 |
maybe_download(params.rec_model_dir, rec_url)
|
620 |
+
# maybe_download(params.table_model_dir, table_url)
|
621 |
+
# maybe_download(params.layout_model_dir, layout_url)
|
622 |
+
maybe_download(params.ser_model_dir, ser_url)
|
623 |
|
624 |
if params.rec_char_dict_path is None:
|
625 |
params.rec_char_dict_path = str(
|
626 |
Path(__file__).parent / rec_model_config['dict_path'])
|
627 |
+
# if params.table_char_dict_path is None:
|
628 |
+
# params.table_char_dict_path = str(
|
629 |
+
# Path(__file__).parent / table_model_config['dict_path'])
|
630 |
+
# if params.layout_dict_path is None:
|
631 |
+
# params.layout_dict_path = str(
|
632 |
+
# Path(__file__).parent / layout_model_config['dict_path'])
|
633 |
+
if params.ser_dict_path is None:
|
634 |
+
params.ser_dict_path = str(
|
635 |
+
Path(__file__).parent / ser_model_config['dict_path'])
|
636 |
logger.debug(params)
|
637 |
super().__init__(params)
|
638 |
|
ppocr/utils/dict/kie/clinical_class_list.txt
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
hospitalname_key
|
2 |
+
hospitalname_value
|
3 |
+
hospitaladdress_key
|
4 |
+
hospitaladdress_value
|
5 |
+
department_key
|
6 |
+
department_value
|
7 |
+
diagnose_key
|
8 |
+
diagnose_value
|
9 |
+
doctorcomment_key
|
10 |
+
doctorcomment_value
|
11 |
+
doctorname_key
|
12 |
+
doctorname_value
|
13 |
+
other
|
ppocr/utils/dict/{kie_dict → kie}/xfund_class_list.txt
RENAMED
File without changes
|
ppocr/utils/e2e_utils/__pycache__/extract_textpoint_fast.cpython-310.pyc
ADDED
Binary file (12.9 kB). View file
|
|
ppocr/utils/e2e_utils/__pycache__/extract_textpoint_slow.cpython-310.pyc
ADDED
Binary file (13.6 kB). View file
|
|
ppocr/utils/e2e_utils/__pycache__/pgnet_pp_utils.cpython-310.pyc
ADDED
Binary file (3.93 kB). View file
|
|
ppstructure/models/kie/ser_clinical.tar
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5f2f54efb1b23b0e6ac00238c7d5060f6a285ff06d0e2e5d0c30ce24e7fd0fff
|
3 |
+
size 1113108480
|
ppstructure/table/tablepyxl/__pycache__/__init__.cpython-37.pyc
DELETED
Binary file (144 Bytes)
|
|
ppstructure/table/tablepyxl/__pycache__/style.cpython-37.pyc
DELETED
Binary file (10.7 kB)
|
|
ppstructure/table/tablepyxl/__pycache__/tablepyxl.cpython-37.pyc
DELETED
Binary file (3.59 kB)
|
|
ppstructure/utility.py
CHANGED
@@ -65,7 +65,7 @@ def init_args():
|
|
65 |
"--mode",
|
66 |
type=str,
|
67 |
choices=['structure', 'kie'],
|
68 |
-
default='
|
69 |
help='structure and kie is supported')
|
70 |
parser.add_argument(
|
71 |
"--image_orientation",
|
|
|
65 |
"--mode",
|
66 |
type=str,
|
67 |
choices=['structure', 'kie'],
|
68 |
+
default='kie',
|
69 |
help='structure and kie is supported')
|
70 |
parser.add_argument(
|
71 |
"--image_orientation",
|
test.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"table": {"en": {"url": "https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar", "dict_path": "ppocr/utils/dict/table_structure_dict.txt"}, "ch": {"url": "https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar", "dict_path": "ppocr/utils/dict/table_structure_dict_ch.txt"}}, "layout": {"en": {"url": "https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_infer.tar", "dict_path": "ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt"}, "ch": {"url": "https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_cdla_infer.tar", "dict_path": "ppocr/utils/dict/layout_dict/layout_cdla_dict.txt"}}, "kie": {"en": {"url": "https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_infer.tar", "dict_path": "ppocr/utils/dict/kie/clinical_class_list.txt"}, "ch": {"url": "https://huggingface.co/spaces/CallMeMrFern/ocr/ppstructure/models/kie/ser_clinical.tar", "dict_path": "ppocr/utils/dict/kie/clinical_class_list.txt"}}}
|
tools/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (129 Bytes). View file
|
|
tools/infer/__pycache__/predict_cls.cpython-310.pyc
ADDED
Binary file (4.1 kB). View file
|
|
tools/infer/__pycache__/predict_det.cpython-310.pyc
ADDED
Binary file (8.55 kB). View file
|
|
tools/infer/__pycache__/predict_rec.cpython-310.pyc
ADDED
Binary file (13.7 kB). View file
|
|
tools/infer/__pycache__/predict_system.cpython-310.pyc
ADDED
Binary file (7.11 kB). View file
|
|
tools/infer/__pycache__/utility.cpython-310.pyc
ADDED
Binary file (17.3 kB). View file
|
|