Danieldu commited on
Commit
4765310
1 Parent(s): 71150b9

add paddleocr

Browse files
Files changed (1) hide show
  1. paddleocr.py +738 -0
paddleocr.py ADDED
@@ -0,0 +1,738 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import os
16
+ import sys
17
+ import importlib
18
+
19
+ __dir__ = os.path.dirname(__file__)
20
+
21
+ import paddle
22
+
23
+ sys.path.append(os.path.join(__dir__, ''))
24
+
25
+ import cv2
26
+ import logging
27
+ import numpy as np
28
+ from pathlib import Path
29
+
30
+ def _import_file(module_name, file_path, make_importable=False):
31
+ spec = importlib.util.spec_from_file_location(module_name, file_path)
32
+ module = importlib.util.module_from_spec(spec)
33
+ spec.loader.exec_module(module)
34
+ if make_importable:
35
+ sys.modules[module_name] = module
36
+ return module
37
+
38
+ tools = _import_file('tools', os.path.join(__dir__, 'tools/__init__.py'), make_importable=True)
39
+ ppocr = importlib.import_module('ppocr', 'paddleocr')
40
+ ppstructure = importlib.import_module('ppstructure', 'paddleocr')
41
+
42
+ from tools.infer import predict_system
43
+ from ppocr.utils.logging import get_logger
44
+
45
+ logger = get_logger()
46
+ from ppocr.utils.utility import check_and_read, get_image_file_list
47
+ from ppocr.utils.network import maybe_download, download_with_progressbar, is_link, confirm_model_dir_url
48
+ from tools.infer.utility import draw_ocr, str2bool, check_gpu
49
+ from ppstructure.utility import init_args, draw_structure_result
50
+ from ppstructure.predict_system import StructureSystem, save_structure_res, to_excel
51
+
52
+ __all__ = [
53
+ 'PaddleOCR', 'PPStructure', 'draw_ocr', 'draw_structure_result',
54
+ 'save_structure_res', 'download_with_progressbar', 'to_excel'
55
+ ]
56
+
57
+ SUPPORT_DET_MODEL = ['DB']
58
+ VERSION = '2.6.1.0'
59
+ SUPPORT_REC_MODEL = ['CRNN', 'SVTR_LCNet']
60
+ BASE_DIR = os.path.expanduser("~/.paddleocr/")
61
+
62
+ DEFAULT_OCR_MODEL_VERSION = 'PP-OCRv3'
63
+ SUPPORT_OCR_MODEL_VERSION = ['PP-OCR', 'PP-OCRv2', 'PP-OCRv3']
64
+ DEFAULT_STRUCTURE_MODEL_VERSION = 'PP-StructureV2'
65
+ SUPPORT_STRUCTURE_MODEL_VERSION = ['PP-Structure', 'PP-StructureV2']
66
+ MODEL_URLS = {
67
+ 'OCR': {
68
+ 'PP-OCRv3': {
69
+ 'det': {
70
+ 'ch': {
71
+ 'url':
72
+ 'https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar',
73
+ },
74
+ 'en': {
75
+ 'url':
76
+ 'https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar',
77
+ },
78
+ 'ml': {
79
+ 'url':
80
+ 'https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/Multilingual_PP-OCRv3_det_infer.tar'
81
+ }
82
+ },
83
+ 'rec': {
84
+ 'tw': {
85
+ 'url':
86
+ 'https://huggingface.co/spaces/DeepLearning101/OCR101TW/resolve/main/20230804_latest-100_rec.tar',
87
+ 'dict_path': './230802_v2_common_dict.txt'
88
+ },
89
+ 'ch': {
90
+ 'url':
91
+ 'https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar',
92
+ 'dict_path': './ppocr/utils/ppocr_keys_v1.txt'
93
+ },
94
+ 'en': {
95
+ 'url':
96
+ 'https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tar',
97
+ 'dict_path': './ppocr/utils/en_dict.txt'
98
+ },
99
+ 'korean': {
100
+ 'url':
101
+ 'https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/korean_PP-OCRv3_rec_infer.tar',
102
+ 'dict_path': './ppocr/utils/dict/korean_dict.txt'
103
+ },
104
+ 'japan': {
105
+ 'url':
106
+ 'https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/japan_PP-OCRv3_rec_infer.tar',
107
+ 'dict_path': './ppocr/utils/dict/japan_dict.txt'
108
+ },
109
+ 'chinese_cht': {
110
+ 'url':
111
+ 'https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/chinese_cht_PP-OCRv3_rec_infer.tar',
112
+ 'dict_path': './ppocr/utils/dict/chinese_cht_dict.txt'
113
+ },
114
+ 'ta': {
115
+ 'url':
116
+ 'https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/ta_PP-OCRv3_rec_infer.tar',
117
+ 'dict_path': './ppocr/utils/dict/ta_dict.txt'
118
+ },
119
+ 'te': {
120
+ 'url':
121
+ 'https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/te_PP-OCRv3_rec_infer.tar',
122
+ 'dict_path': './ppocr/utils/dict/te_dict.txt'
123
+ },
124
+ 'ka': {
125
+ 'url':
126
+ 'https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/ka_PP-OCRv3_rec_infer.tar',
127
+ 'dict_path': './ppocr/utils/dict/ka_dict.txt'
128
+ },
129
+ 'latin': {
130
+ 'url':
131
+ 'https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/latin_PP-OCRv3_rec_infer.tar',
132
+ 'dict_path': './ppocr/utils/dict/latin_dict.txt'
133
+ },
134
+ 'arabic': {
135
+ 'url':
136
+ 'https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/arabic_PP-OCRv3_rec_infer.tar',
137
+ 'dict_path': './ppocr/utils/dict/arabic_dict.txt'
138
+ },
139
+ 'cyrillic': {
140
+ 'url':
141
+ 'https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/cyrillic_PP-OCRv3_rec_infer.tar',
142
+ 'dict_path': './ppocr/utils/dict/cyrillic_dict.txt'
143
+ },
144
+ 'devanagari': {
145
+ 'url':
146
+ 'https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/devanagari_PP-OCRv3_rec_infer.tar',
147
+ 'dict_path': './ppocr/utils/dict/devanagari_dict.txt'
148
+ },
149
+ },
150
+ 'cls': {
151
+ 'ch': {
152
+ 'url':
153
+ 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar',
154
+ }
155
+ },
156
+ },
157
+ 'PP-OCRv2': {
158
+ 'det': {
159
+ 'ch': {
160
+ 'url':
161
+ 'https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar',
162
+ },
163
+ },
164
+ 'rec': {
165
+ 'ch': {
166
+ 'url':
167
+ 'https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar',
168
+ 'dict_path': './ppocr/utils/ppocr_keys_v1.txt'
169
+ }
170
+ },
171
+ 'cls': {
172
+ 'ch': {
173
+ 'url':
174
+ 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar',
175
+ }
176
+ },
177
+ },
178
+ 'PP-OCR': {
179
+ 'det': {
180
+ 'ch': {
181
+ 'url':
182
+ 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar',
183
+ },
184
+ 'en': {
185
+ 'url':
186
+ 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_ppocr_mobile_v2.0_det_infer.tar',
187
+ },
188
+ 'structure': {
189
+ 'url':
190
+ 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar'
191
+ }
192
+ },
193
+ 'rec': {
194
+ 'ch': {
195
+ 'url':
196
+ 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar',
197
+ 'dict_path': './ppocr/utils/ppocr_keys_v1.txt'
198
+ },
199
+ 'en': {
200
+ 'url':
201
+ 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_infer.tar',
202
+ 'dict_path': './ppocr/utils/en_dict.txt'
203
+ },
204
+ 'french': {
205
+ 'url':
206
+ 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/french_mobile_v2.0_rec_infer.tar',
207
+ 'dict_path': './ppocr/utils/dict/french_dict.txt'
208
+ },
209
+ 'german': {
210
+ 'url':
211
+ 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tar',
212
+ 'dict_path': './ppocr/utils/dict/german_dict.txt'
213
+ },
214
+ 'korean': {
215
+ 'url':
216
+ 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tar',
217
+ 'dict_path': './ppocr/utils/dict/korean_dict.txt'
218
+ },
219
+ 'japan': {
220
+ 'url':
221
+ 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tar',
222
+ 'dict_path': './ppocr/utils/dict/japan_dict.txt'
223
+ },
224
+ 'chinese_cht': {
225
+ 'url':
226
+ 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_infer.tar',
227
+ 'dict_path': './ppocr/utils/dict/chinese_cht_dict.txt'
228
+ },
229
+ 'ta': {
230
+ 'url':
231
+ 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_infer.tar',
232
+ 'dict_path': './ppocr/utils/dict/ta_dict.txt'
233
+ },
234
+ 'te': {
235
+ 'url':
236
+ 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_infer.tar',
237
+ 'dict_path': './ppocr/utils/dict/te_dict.txt'
238
+ },
239
+ 'ka': {
240
+ 'url':
241
+ 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_infer.tar',
242
+ 'dict_path': './ppocr/utils/dict/ka_dict.txt'
243
+ },
244
+ 'latin': {
245
+ 'url':
246
+ 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/latin_ppocr_mobile_v2.0_rec_infer.tar',
247
+ 'dict_path': './ppocr/utils/dict/latin_dict.txt'
248
+ },
249
+ 'arabic': {
250
+ 'url':
251
+ 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/arabic_ppocr_mobile_v2.0_rec_infer.tar',
252
+ 'dict_path': './ppocr/utils/dict/arabic_dict.txt'
253
+ },
254
+ 'cyrillic': {
255
+ 'url':
256
+ 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_infer.tar',
257
+ 'dict_path': './ppocr/utils/dict/cyrillic_dict.txt'
258
+ },
259
+ 'devanagari': {
260
+ 'url':
261
+ 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_infer.tar',
262
+ 'dict_path': './ppocr/utils/dict/devanagari_dict.txt'
263
+ },
264
+ 'structure': {
265
+ 'url':
266
+ 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar',
267
+ 'dict_path': 'ppocr/utils/dict/table_dict.txt'
268
+ }
269
+ },
270
+ 'cls': {
271
+ 'ch': {
272
+ 'url':
273
+ 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar',
274
+ }
275
+ },
276
+ }
277
+ },
278
+ 'STRUCTURE': {
279
+ 'PP-Structure': {
280
+ 'table': {
281
+ 'en': {
282
+ 'url':
283
+ 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar',
284
+ 'dict_path': 'ppocr/utils/dict/table_structure_dict.txt'
285
+ }
286
+ }
287
+ },
288
+ 'PP-StructureV2': {
289
+ 'table': {
290
+ 'en': {
291
+ 'url':
292
+ 'https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar',
293
+ 'dict_path': 'ppocr/utils/dict/table_structure_dict.txt'
294
+ },
295
+ 'ch': {
296
+ 'url':
297
+ 'https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar',
298
+ 'dict_path': 'ppocr/utils/dict/table_structure_dict_ch.txt'
299
+ }
300
+ },
301
+ 'layout': {
302
+ 'en': {
303
+ 'url':
304
+ 'https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_infer.tar',
305
+ 'dict_path':
306
+ 'ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt'
307
+ },
308
+ 'ch': {
309
+ 'url':
310
+ 'https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_cdla_infer.tar',
311
+ 'dict_path':
312
+ 'ppocr/utils/dict/layout_dict/layout_cdla_dict.txt'
313
+ }
314
+ }
315
+ }
316
+ }
317
+ }
318
+
319
+
320
+ def parse_args(mMain=True):
321
+ import argparse
322
+ parser = init_args()
323
+ parser.add_help = mMain
324
+ parser.add_argument("--lang", type=str, default='tw')
325
+ parser.add_argument("--det", type=str2bool, default=True)
326
+ parser.add_argument("--rec", type=str2bool, default=True)
327
+ parser.add_argument("--type", type=str, default='ocr')
328
+ parser.add_argument(
329
+ "--ocr_version",
330
+ type=str,
331
+ choices=SUPPORT_OCR_MODEL_VERSION,
332
+ default='PP-OCRv3',
333
+ help='OCR Model version, the current model support list is as follows: '
334
+ '1. PP-OCRv3 Support Chinese and English detection and recognition model, and direction classifier model'
335
+ '2. PP-OCRv2 Support Chinese detection and recognition model. '
336
+ '3. PP-OCR support Chinese detection, recognition and direction classifier and multilingual recognition model.'
337
+ )
338
+ parser.add_argument(
339
+ "--structure_version",
340
+ type=str,
341
+ choices=SUPPORT_STRUCTURE_MODEL_VERSION,
342
+ default='PP-StructureV2',
343
+ help='Model version, the current model support list is as follows:'
344
+ ' 1. PP-Structure Support en table structure model.'
345
+ ' 2. PP-StructureV2 Support ch and en table structure model.')
346
+
347
+ for action in parser._actions:
348
+ if action.dest in [
349
+ 'rec_char_dict_path', 'table_char_dict_path', 'layout_dict_path'
350
+ ]:
351
+ action.default = None
352
+ if mMain:
353
+ return parser.parse_args()
354
+ else:
355
+ inference_args_dict = {}
356
+ for action in parser._actions:
357
+ inference_args_dict[action.dest] = action.default
358
+ return argparse.Namespace(**inference_args_dict)
359
+
360
+
361
+ def parse_lang(lang):
362
+ latin_lang = [
363
+ 'af', 'az', 'bs', 'cs', 'cy', 'da', 'de', 'es', 'et', 'fr', 'ga', 'hr',
364
+ 'hu', 'id', 'is', 'it', 'ku', 'la', 'lt', 'lv', 'mi', 'ms', 'mt', 'nl',
365
+ 'no', 'oc', 'pi', 'pl', 'pt', 'ro', 'rs_latin', 'sk', 'sl', 'sq', 'sv',
366
+ 'sw', 'tl', 'tr', 'uz', 'vi', 'french', 'german'
367
+ ]
368
+ arabic_lang = ['ar', 'fa', 'ug', 'ur']
369
+ cyrillic_lang = [
370
+ 'ru', 'rs_cyrillic', 'be', 'bg', 'uk', 'mn', 'abq', 'ady', 'kbd', 'ava',
371
+ 'dar', 'inh', 'che', 'lbe', 'lez', 'tab'
372
+ ]
373
+ devanagari_lang = [
374
+ 'hi', 'mr', 'ne', 'bh', 'mai', 'ang', 'bho', 'mah', 'sck', 'new', 'gom',
375
+ 'sa', 'bgc'
376
+ ]
377
+ if lang in latin_lang:
378
+ lang = "latin"
379
+ elif lang in arabic_lang:
380
+ lang = "arabic"
381
+ elif lang in cyrillic_lang:
382
+ lang = "cyrillic"
383
+ elif lang in devanagari_lang:
384
+ lang = "devanagari"
385
+ assert lang in MODEL_URLS['OCR'][DEFAULT_OCR_MODEL_VERSION][
386
+ 'rec'], 'param lang must in {}, but got {}'.format(
387
+ MODEL_URLS['OCR'][DEFAULT_OCR_MODEL_VERSION]['rec'].keys(), lang)
388
+ if lang == "ch":
389
+ det_lang = "ch"
390
+ elif lang == 'tw':
391
+ det_lang = 'ch'
392
+ elif lang == 'structure':
393
+ det_lang = 'structure'
394
+ elif lang in ["en", "latin"]:
395
+ det_lang = "en"
396
+ else:
397
+ det_lang = "ml"
398
+ return lang, det_lang
399
+
400
+
401
+ def get_model_config(type, version, model_type, lang):
402
+ if type == 'OCR':
403
+ DEFAULT_MODEL_VERSION = DEFAULT_OCR_MODEL_VERSION
404
+ elif type == 'STRUCTURE':
405
+ DEFAULT_MODEL_VERSION = DEFAULT_STRUCTURE_MODEL_VERSION
406
+ else:
407
+ raise NotImplementedError
408
+
409
+ model_urls = MODEL_URLS[type]
410
+ if version not in model_urls:
411
+ version = DEFAULT_MODEL_VERSION
412
+ if model_type not in model_urls[version]:
413
+ if model_type in model_urls[DEFAULT_MODEL_VERSION]:
414
+ version = DEFAULT_MODEL_VERSION
415
+ else:
416
+ logger.error('{} models is not support, we only support {}'.format(
417
+ model_type, model_urls[DEFAULT_MODEL_VERSION].keys()))
418
+ sys.exit(-1)
419
+
420
+ if lang not in model_urls[version][model_type]:
421
+ if lang in model_urls[DEFAULT_MODEL_VERSION][model_type]:
422
+ version = DEFAULT_MODEL_VERSION
423
+ else:
424
+ logger.error(
425
+ 'lang {} is not support, we only support {} for {} models'.
426
+ format(lang, model_urls[DEFAULT_MODEL_VERSION][model_type].keys(
427
+ ), model_type))
428
+ sys.exit(-1)
429
+ return model_urls[version][model_type][lang]
430
+
431
+
432
+ def img_decode(content: bytes):
433
+ np_arr = np.frombuffer(content, dtype=np.uint8)
434
+ return cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
435
+
436
+
437
+ def check_img(img):
438
+ if isinstance(img, bytes):
439
+ img = img_decode(img)
440
+ if isinstance(img, str):
441
+ # download net image
442
+ if is_link(img):
443
+ download_with_progressbar(img, 'tmp.jpg')
444
+ img = 'tmp.jpg'
445
+ image_file = img
446
+ img, flag_gif, flag_pdf = check_and_read(image_file)
447
+ if not flag_gif and not flag_pdf:
448
+ with open(image_file, 'rb') as f:
449
+ img = img_decode(f.read())
450
+ if img is None:
451
+ logger.error("error in loading image:{}".format(image_file))
452
+ return None
453
+ if isinstance(img, np.ndarray) and len(img.shape) == 2:
454
+ img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
455
+
456
+ return img
457
+
458
+
459
+ class PaddleOCR(predict_system.TextSystem):
460
+ def __init__(self, **kwargs):
461
+ """
462
+ paddleocr package
463
+ args:
464
+ **kwargs: other params show in paddleocr --help
465
+ """
466
+ params = parse_args(mMain=False)
467
+ params.__dict__.update(**kwargs)
468
+ assert params.ocr_version in SUPPORT_OCR_MODEL_VERSION, "ocr_version must in {}, but get {}".format(
469
+ SUPPORT_OCR_MODEL_VERSION, params.ocr_version)
470
+ params.use_gpu = check_gpu(params.use_gpu)
471
+
472
+ if not params.show_log:
473
+ logger.setLevel(logging.INFO)
474
+ self.use_angle_cls = params.use_angle_cls
475
+ lang, det_lang = parse_lang(params.lang)
476
+
477
+ # init model dir
478
+ det_model_config = get_model_config('OCR', params.ocr_version, 'det',
479
+ det_lang)
480
+ params.det_model_dir, det_url = confirm_model_dir_url(
481
+ params.det_model_dir,
482
+ os.path.join(BASE_DIR, 'whl', 'det', det_lang),
483
+ det_model_config['url'])
484
+ rec_model_config = get_model_config('OCR', params.ocr_version, 'rec',
485
+ lang)
486
+ params.rec_model_dir, rec_url = confirm_model_dir_url(
487
+ params.rec_model_dir,
488
+ os.path.join(BASE_DIR, 'whl', 'rec', lang), rec_model_config['url'])
489
+ cls_model_config = get_model_config('OCR', params.ocr_version, 'cls',
490
+ 'ch')
491
+ params.cls_model_dir, cls_url = confirm_model_dir_url(
492
+ params.cls_model_dir,
493
+ os.path.join(BASE_DIR, 'whl', 'cls'), cls_model_config['url'])
494
+ if params.ocr_version == 'PP-OCRv3':
495
+ params.rec_image_shape = "3, 48, 320"
496
+ else:
497
+ params.rec_image_shape = "3, 32, 320"
498
+ # download model if using paddle infer
499
+ if not params.use_onnx:
500
+ maybe_download(params.det_model_dir, det_url)
501
+ maybe_download(params.rec_model_dir, rec_url)
502
+ maybe_download(params.cls_model_dir, cls_url)
503
+
504
+ if params.det_algorithm not in SUPPORT_DET_MODEL:
505
+ logger.error('det_algorithm must in {}'.format(SUPPORT_DET_MODEL))
506
+ sys.exit(0)
507
+ if params.rec_algorithm not in SUPPORT_REC_MODEL:
508
+ logger.error('rec_algorithm must in {}'.format(SUPPORT_REC_MODEL))
509
+ sys.exit(0)
510
+
511
+ if params.rec_char_dict_path is None:
512
+ params.rec_char_dict_path = str(
513
+ Path(__file__).parent / rec_model_config['dict_path'])
514
+
515
+ logger.debug(params)
516
+ # init det_model and rec_model
517
+ super().__init__(params)
518
+ self.page_num = params.page_num
519
+
520
+ def ocr(self, img, det=True, rec=True, cls=True):
521
+ """
522
+ ocr with paddleocr
523
+ args:
524
+ img: img for ocr, support ndarray, img_path and list or ndarray
525
+ det: use text detection or not. If false, only rec will be exec. Default is True
526
+ rec: use text recognition or not. If false, only det will be exec. Default is True
527
+ cls: use angle classifier or not. Default is True. If true, the text with rotation of 180 degrees can be recognized. If no text is rotated by 180 degrees, use cls=False to get better performance. Text with rotation of 90 or 270 degrees can be recognized even if cls=False.
528
+ """
529
+ assert isinstance(img, (np.ndarray, list, str, bytes))
530
+ if isinstance(img, list) and det == True:
531
+ logger.error('When input a list of images, det must be false')
532
+ exit(0)
533
+ if cls == True and self.use_angle_cls == False:
534
+ logger.warning(
535
+ 'Since the angle classifier is not initialized, the angle classifier will not be uesd during the forward process'
536
+ )
537
+
538
+ img = check_img(img)
539
+ # for infer pdf file
540
+ if isinstance(img, list):
541
+ if self.page_num > len(img) or self.page_num == 0:
542
+ imgs=img
543
+ else:
544
+ imgs = img[:self.page_num]
545
+ else:
546
+ imgs = [img]
547
+ if det and rec:
548
+ ocr_res = []
549
+ for idx, img in enumerate(imgs):
550
+ dt_boxes, rec_res, _ = self.__call__(img, cls)
551
+ tmp_res = [[box.tolist(), res]
552
+ for box, res in zip(dt_boxes, rec_res)]
553
+ ocr_res.append(tmp_res)
554
+ return ocr_res
555
+ elif det and not rec:
556
+ ocr_res = []
557
+ for idx, img in enumerate(imgs):
558
+ dt_boxes, elapse = self.text_detector(img)
559
+ tmp_res = [box.tolist() for box in dt_boxes]
560
+ ocr_res.append(tmp_res)
561
+ return ocr_res
562
+ else:
563
+ ocr_res = []
564
+ cls_res = []
565
+ for idx, img in enumerate(imgs):
566
+ if not isinstance(img, list):
567
+ img = [img]
568
+ if self.use_angle_cls and cls:
569
+ img, cls_res_tmp, elapse = self.text_classifier(img)
570
+ if not rec:
571
+ cls_res.append(cls_res_tmp)
572
+ rec_res, elapse = self.text_recognizer(img)
573
+ ocr_res.append(rec_res)
574
+ if not rec:
575
+ return cls_res
576
+ return ocr_res
577
+
578
+
579
+ class PPStructure(StructureSystem):
580
+ def __init__(self, **kwargs):
581
+ params = parse_args(mMain=False)
582
+ params.__dict__.update(**kwargs)
583
+ assert params.structure_version in SUPPORT_STRUCTURE_MODEL_VERSION, "structure_version must in {}, but get {}".format(
584
+ SUPPORT_STRUCTURE_MODEL_VERSION, params.structure_version)
585
+ params.use_gpu = check_gpu(params.use_gpu)
586
+ params.mode = 'structure'
587
+
588
+ if not params.show_log:
589
+ logger.setLevel(logging.INFO)
590
+ lang, det_lang = parse_lang(params.lang)
591
+ if lang == 'ch':
592
+ table_lang = 'ch'
593
+ else:
594
+ table_lang = 'en'
595
+ if params.structure_version == 'PP-Structure':
596
+ params.merge_no_span_structure = False
597
+
598
+ # init model dir
599
+ det_model_config = get_model_config('OCR', params.ocr_version, 'det',
600
+ det_lang)
601
+ params.det_model_dir, det_url = confirm_model_dir_url(
602
+ params.det_model_dir,
603
+ os.path.join(BASE_DIR, 'whl', 'det', det_lang),
604
+ det_model_config['url'])
605
+ rec_model_config = get_model_config('OCR', params.ocr_version, 'rec',
606
+ lang)
607
+ params.rec_model_dir, rec_url = confirm_model_dir_url(
608
+ params.rec_model_dir,
609
+ os.path.join(BASE_DIR, 'whl', 'rec', lang), rec_model_config['url'])
610
+ table_model_config = get_model_config(
611
+ 'STRUCTURE', params.structure_version, 'table', table_lang)
612
+ params.table_model_dir, table_url = confirm_model_dir_url(
613
+ params.table_model_dir,
614
+ os.path.join(BASE_DIR, 'whl', 'table'), table_model_config['url'])
615
+ layout_model_config = get_model_config(
616
+ 'STRUCTURE', params.structure_version, 'layout', lang)
617
+ params.layout_model_dir, layout_url = confirm_model_dir_url(
618
+ params.layout_model_dir,
619
+ os.path.join(BASE_DIR, 'whl', 'layout'), layout_model_config['url'])
620
+ # download model
621
+ maybe_download(params.det_model_dir, det_url)
622
+ maybe_download(params.rec_model_dir, rec_url)
623
+ maybe_download(params.table_model_dir, table_url)
624
+ maybe_download(params.layout_model_dir, layout_url)
625
+
626
+ if params.rec_char_dict_path is None:
627
+ params.rec_char_dict_path = str(
628
+ Path(__file__).parent / rec_model_config['dict_path'])
629
+ if params.table_char_dict_path is None:
630
+ params.table_char_dict_path = str(
631
+ Path(__file__).parent / table_model_config['dict_path'])
632
+ if params.layout_dict_path is None:
633
+ params.layout_dict_path = str(
634
+ Path(__file__).parent / layout_model_config['dict_path'])
635
+ logger.debug(params)
636
+ super().__init__(params)
637
+
638
+ def __call__(self, img, return_ocr_result_in_table=False, img_idx=0):
639
+ img = check_img(img)
640
+ res, _ = super().__call__(
641
+ img, return_ocr_result_in_table, img_idx=img_idx)
642
+ return res
643
+
644
+
645
+ def main():
646
+ # for cmd
647
+ args = parse_args(mMain=True)
648
+ image_dir = args.image_dir
649
+ if is_link(image_dir):
650
+ download_with_progressbar(image_dir, 'tmp.jpg')
651
+ image_file_list = ['tmp.jpg']
652
+ else:
653
+ image_file_list = get_image_file_list(args.image_dir)
654
+ if len(image_file_list) == 0:
655
+ logger.error('no images find in {}'.format(args.image_dir))
656
+ return
657
+ if args.type == 'ocr':
658
+ engine = PaddleOCR(**(args.__dict__))
659
+ elif args.type == 'structure':
660
+ engine = PPStructure(**(args.__dict__))
661
+ else:
662
+ raise NotImplementedError
663
+
664
+ for img_path in image_file_list:
665
+ img_name = os.path.basename(img_path).split('.')[0]
666
+ logger.info('{}{}{}'.format('*' * 10, img_path, '*' * 10))
667
+ if args.type == 'ocr':
668
+ result = engine.ocr(img_path,
669
+ det=args.det,
670
+ rec=args.rec,
671
+ cls=args.use_angle_cls)
672
+ if result is not None:
673
+ for idx in range(len(result)):
674
+ res = result[idx]
675
+ for line in res:
676
+ logger.info(line)
677
+ elif args.type == 'structure':
678
+ img, flag_gif, flag_pdf = check_and_read(img_path)
679
+ if not flag_gif and not flag_pdf:
680
+ img = cv2.imread(img_path)
681
+
682
+ if args.recovery and args.use_pdf2docx_api and flag_pdf:
683
+ from pdf2docx.converter import Converter
684
+ docx_file = os.path.join(args.output,
685
+ '{}.docx'.format(img_name))
686
+ cv = Converter(img_path)
687
+ cv.convert(docx_file)
688
+ cv.close()
689
+ logger.info('docx save to {}'.format(docx_file))
690
+ continue
691
+
692
+ if not flag_pdf:
693
+ if img is None:
694
+ logger.error("error in loading image:{}".format(img_path))
695
+ continue
696
+ img_paths = [[img_path, img]]
697
+ else:
698
+ img_paths = []
699
+ for index, pdf_img in enumerate(img):
700
+ os.makedirs(
701
+ os.path.join(args.output, img_name), exist_ok=True)
702
+ pdf_img_path = os.path.join(
703
+ args.output, img_name,
704
+ img_name + '_' + str(index) + '.jpg')
705
+ cv2.imwrite(pdf_img_path, pdf_img)
706
+ img_paths.append([pdf_img_path, pdf_img])
707
+
708
+ all_res = []
709
+ for index, (new_img_path, img) in enumerate(img_paths):
710
+ logger.info('processing {}/{} page:'.format(index + 1,
711
+ len(img_paths)))
712
+ new_img_name = os.path.basename(new_img_path).split('.')[0]
713
+ result = engine(new_img_path, img_idx=index)
714
+ save_structure_res(result, args.output, img_name, index)
715
+
716
+ if args.recovery and result != []:
717
+ from copy import deepcopy
718
+ from ppstructure.recovery.recovery_to_doc import sorted_layout_boxes
719
+ h, w, _ = img.shape
720
+ result_cp = deepcopy(result)
721
+ result_sorted = sorted_layout_boxes(result_cp, w)
722
+ all_res += result_sorted
723
+
724
+ if args.recovery and all_res != []:
725
+ try:
726
+ from ppstructure.recovery.recovery_to_doc import convert_info_docx
727
+ convert_info_docx(img, all_res, args.output, img_name)
728
+ except Exception as ex:
729
+ logger.error(
730
+ "error in layout recovery image:{}, err msg: {}".format(
731
+ img_name, ex))
732
+ continue
733
+
734
+ for item in all_res:
735
+ item.pop('img')
736
+ item.pop('res')
737
+ logger.info(item)
738
+ logger.info('result save to {}'.format(args.output))