Sanster commited on
Commit
bee3b07
·
verified ·
1 Parent(s): 403a5dc

Upload 5 files

Browse files
ch_PP-OCRv4_det_student.yml ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Global:
2
+ debug: false
3
+ use_gpu: true
4
+ epoch_num: &epoch_num 500
5
+ log_smooth_window: 20
6
+ print_batch_step: 100
7
+ save_model_dir: ./output/ch_PP-OCRv4
8
+ save_epoch_step: 10
9
+ eval_batch_step:
10
+ - 0
11
+ - 1500
12
+ cal_metric_during_train: false
13
+ checkpoints:
14
+ pretrained_model: https://paddleocr.bj.bcebos.com/pretrained/PPLCNetV3_x0_75_ocr_det.pdparams
15
+ save_inference_dir: null
16
+ use_visualdl: false
17
+ infer_img: doc/imgs_en/img_10.jpg
18
+ save_res_path: ./checkpoints/det_db/predicts_db.txt
19
+ distributed: true
20
+
21
+ Architecture:
22
+ model_type: det
23
+ algorithm: DB
24
+ Transform: null
25
+ Backbone:
26
+ name: PPLCNetV3
27
+ scale: 0.75
28
+ det: True
29
+ Neck:
30
+ name: RSEFPN
31
+ out_channels: 96
32
+ shortcut: True
33
+ Head:
34
+ name: DBHead
35
+ k: 50
36
+
37
+ Loss:
38
+ name: DBLoss
39
+ balance_loss: true
40
+ main_loss_type: DiceLoss
41
+ alpha: 5
42
+ beta: 10
43
+ ohem_ratio: 3
44
+
45
+ Optimizer:
46
+ name: Adam
47
+ beta1: 0.9
48
+ beta2: 0.999
49
+ lr:
50
+ name: Cosine
51
+ learning_rate: 0.001 #(8*8c)
52
+ warmup_epoch: 2
53
+ regularizer:
54
+ name: L2
55
+ factor: 5.0e-05
56
+
57
+ PostProcess:
58
+ name: DBPostProcess
59
+ thresh: 0.3
60
+ box_thresh: 0.6
61
+ max_candidates: 1000
62
+ unclip_ratio: 1.5
63
+
64
+ Metric:
65
+ name: DetMetric
66
+ main_indicator: hmean
67
+
68
+ Train:
69
+ dataset:
70
+ name: SimpleDataSet
71
+ data_dir: ./train_data/icdar2015/text_localization/
72
+ label_file_list:
73
+ - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
74
+ ratio_list: [1.0]
75
+ transforms:
76
+ - DecodeImage:
77
+ img_mode: BGR
78
+ channel_first: false
79
+ - DetLabelEncode: null
80
+ - CopyPaste: null
81
+ - IaaAugment:
82
+ augmenter_args:
83
+ - type: Fliplr
84
+ args:
85
+ p: 0.5
86
+ - type: Affine
87
+ args:
88
+ rotate:
89
+ - -10
90
+ - 10
91
+ - type: Resize
92
+ args:
93
+ size:
94
+ - 0.5
95
+ - 3
96
+ - EastRandomCropData:
97
+ size:
98
+ - 640
99
+ - 640
100
+ max_tries: 50
101
+ keep_ratio: true
102
+ - MakeBorderMap:
103
+ shrink_ratio: 0.4
104
+ thresh_min: 0.3
105
+ thresh_max: 0.7
106
+ total_epoch: *epoch_num
107
+ - MakeShrinkMap:
108
+ shrink_ratio: 0.4
109
+ min_text_size: 8
110
+ total_epoch: *epoch_num
111
+ - NormalizeImage:
112
+ scale: 1./255.
113
+ mean:
114
+ - 0.485
115
+ - 0.456
116
+ - 0.406
117
+ std:
118
+ - 0.229
119
+ - 0.224
120
+ - 0.225
121
+ order: hwc
122
+ - ToCHWImage: null
123
+ - KeepKeys:
124
+ keep_keys:
125
+ - image
126
+ - threshold_map
127
+ - threshold_mask
128
+ - shrink_map
129
+ - shrink_mask
130
+ loader:
131
+ shuffle: true
132
+ drop_last: false
133
+ batch_size_per_card: 8
134
+ num_workers: 8
135
+
136
+ Eval:
137
+ dataset:
138
+ name: SimpleDataSet
139
+ data_dir: ./train_data/icdar2015/text_localization/
140
+ label_file_list:
141
+ - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
142
+ transforms:
143
+ - DecodeImage:
144
+ img_mode: BGR
145
+ channel_first: false
146
+ - DetLabelEncode: null
147
+ - DetResizeForTest:
148
+ - NormalizeImage:
149
+ scale: 1./255.
150
+ mean:
151
+ - 0.485
152
+ - 0.456
153
+ - 0.406
154
+ std:
155
+ - 0.229
156
+ - 0.224
157
+ - 0.225
158
+ order: hwc
159
+ - ToCHWImage: null
160
+ - KeepKeys:
161
+ keep_keys:
162
+ - image
163
+ - shape
164
+ - polys
165
+ - ignore_tags
166
+ loader:
167
+ shuffle: false
168
+ drop_last: false
169
+ batch_size_per_card: 1
170
+ num_workers: 2
171
+ profiler_options: null
ch_PP-OCRv4_det_teacher.yml ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Global:
2
+ debug: false
3
+ use_gpu: true
4
+ epoch_num: &epoch_num 500
5
+ log_smooth_window: 20
6
+ print_batch_step: 100
7
+ save_model_dir: ./output/ch_PP-OCRv4
8
+ save_epoch_step: 10
9
+ eval_batch_step:
10
+ - 0
11
+ - 1500
12
+ cal_metric_during_train: false
13
+ checkpoints:
14
+ pretrained_model: https://paddleocr.bj.bcebos.com/pretrained/PPHGNet_small_ocr_det.pdparams
15
+ save_inference_dir: null
16
+ use_visualdl: false
17
+ infer_img: doc/imgs_en/img_10.jpg
18
+ save_res_path: ./checkpoints/det_db/predicts_db.txt
19
+ distributed: true
20
+
21
+ Architecture:
22
+ model_type: det
23
+ algorithm: DB
24
+ Transform: null
25
+ Backbone:
26
+ name: PPHGNet_small
27
+ det: True
28
+ Neck:
29
+ name: LKPAN
30
+ out_channels: 256
31
+ intracl: true
32
+ Head:
33
+ name: PFHeadLocal
34
+ k: 50
35
+ mode: "large"
36
+
37
+
38
+ Loss:
39
+ name: DBLoss
40
+ balance_loss: true
41
+ main_loss_type: DiceLoss
42
+ alpha: 5
43
+ beta: 10
44
+ ohem_ratio: 3
45
+
46
+ Optimizer:
47
+ name: Adam
48
+ beta1: 0.9
49
+ beta2: 0.999
50
+ lr:
51
+ name: Cosine
52
+ learning_rate: 0.001 #(8*8c)
53
+ warmup_epoch: 2
54
+ regularizer:
55
+ name: L2
56
+ factor: 1e-6
57
+
58
+ PostProcess:
59
+ name: DBPostProcess
60
+ thresh: 0.3
61
+ box_thresh: 0.6
62
+ max_candidates: 1000
63
+ unclip_ratio: 1.5
64
+
65
+ Metric:
66
+ name: DetMetric
67
+ main_indicator: hmean
68
+
69
+ Train:
70
+ dataset:
71
+ name: SimpleDataSet
72
+ data_dir: ./train_data/icdar2015/text_localization/
73
+ label_file_list:
74
+ - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
75
+ ratio_list: [1.0]
76
+ transforms:
77
+ - DecodeImage:
78
+ img_mode: BGR
79
+ channel_first: false
80
+ - DetLabelEncode: null
81
+ - CopyPaste: null
82
+ - IaaAugment:
83
+ augmenter_args:
84
+ - type: Fliplr
85
+ args:
86
+ p: 0.5
87
+ - type: Affine
88
+ args:
89
+ rotate:
90
+ - -10
91
+ - 10
92
+ - type: Resize
93
+ args:
94
+ size:
95
+ - 0.5
96
+ - 3
97
+ - EastRandomCropData:
98
+ size:
99
+ - 640
100
+ - 640
101
+ max_tries: 50
102
+ keep_ratio: true
103
+ - MakeBorderMap:
104
+ shrink_ratio: 0.4
105
+ thresh_min: 0.3
106
+ thresh_max: 0.7
107
+ total_epoch: *epoch_num
108
+ - MakeShrinkMap:
109
+ shrink_ratio: 0.4
110
+ min_text_size: 8
111
+ total_epoch: *epoch_num
112
+ - NormalizeImage:
113
+ scale: 1./255.
114
+ mean:
115
+ - 0.485
116
+ - 0.456
117
+ - 0.406
118
+ std:
119
+ - 0.229
120
+ - 0.224
121
+ - 0.225
122
+ order: hwc
123
+ - ToCHWImage: null
124
+ - KeepKeys:
125
+ keep_keys:
126
+ - image
127
+ - threshold_map
128
+ - threshold_mask
129
+ - shrink_map
130
+ - shrink_mask
131
+ loader:
132
+ shuffle: true
133
+ drop_last: false
134
+ batch_size_per_card: 8
135
+ num_workers: 8
136
+
137
+ Eval:
138
+ dataset:
139
+ name: SimpleDataSet
140
+ data_dir: ./train_data/icdar2015/text_localization/
141
+ label_file_list:
142
+ - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
143
+ transforms:
144
+ - DecodeImage:
145
+ img_mode: BGR
146
+ channel_first: false
147
+ - DetLabelEncode: null
148
+ - DetResizeForTest:
149
+ - NormalizeImage:
150
+ scale: 1./255.
151
+ mean:
152
+ - 0.485
153
+ - 0.456
154
+ - 0.406
155
+ std:
156
+ - 0.229
157
+ - 0.224
158
+ - 0.225
159
+ order: hwc
160
+ - ToCHWImage: null
161
+ - KeepKeys:
162
+ keep_keys:
163
+ - image
164
+ - shape
165
+ - polys
166
+ - ignore_tags
167
+ loader:
168
+ shuffle: false
169
+ drop_last: false
170
+ batch_size_per_card: 1
171
+ num_workers: 2
172
+ profiler_options: null
ch_PP-OCRv4_rec.yml ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Global:
2
+ debug: false
3
+ use_gpu: true
4
+ epoch_num: 200
5
+ log_smooth_window: 20
6
+ print_batch_step: 10
7
+ save_model_dir: ./output/rec_ppocr_v4
8
+ save_epoch_step: 10
9
+ eval_batch_step: [0, 2000]
10
+ cal_metric_during_train: true
11
+ pretrained_model:
12
+ checkpoints:
13
+ save_inference_dir:
14
+ use_visualdl: false
15
+ infer_img: doc/imgs_words/ch/word_1.jpg
16
+ character_dict_path: pytorchocr/utils/ppocr_keys_v1.txt
17
+ max_text_length: &max_text_length 25
18
+ infer_mode: true
19
+ use_space_char: true
20
+ distributed: true
21
+ save_res_path: ./output/rec/predicts_ppocrv3.txt
22
+
23
+
24
+ Optimizer:
25
+ name: Adam
26
+ beta1: 0.9
27
+ beta2: 0.999
28
+ lr:
29
+ name: Cosine
30
+ learning_rate: 0.001
31
+ warmup_epoch: 5
32
+ regularizer:
33
+ name: L2
34
+ factor: 3.0e-05
35
+
36
+
37
+ Architecture:
38
+ model_type: rec
39
+ algorithm: SVTR_LCNet
40
+ Transform:
41
+ Backbone:
42
+ name: PPLCNetV3
43
+ scale: 0.95
44
+ Head:
45
+ name: MultiHead
46
+ head_list:
47
+ - CTCHead:
48
+ Neck:
49
+ name: svtr
50
+ dims: 120
51
+ depth: 2
52
+ hidden_dims: 120
53
+ kernel_size: [1, 3]
54
+ use_guide: True
55
+ Head:
56
+ fc_decay: 0.00001
57
+ - NRTRHead:
58
+ nrtr_dim: 384
59
+ max_text_length: *max_text_length
60
+
61
+ Loss:
62
+ name: MultiLoss
63
+ loss_config_list:
64
+ - CTCLoss:
65
+ - NRTRLoss:
66
+
67
+ PostProcess:
68
+ name: CTCLabelDecode
69
+
70
+ Metric:
71
+ name: RecMetric
72
+ main_indicator: acc
73
+
74
+ Train:
75
+ dataset:
76
+ name: MultiScaleDataSet
77
+ ds_width: false
78
+ data_dir: ./train_data/
79
+ ext_op_transform_idx: 1
80
+ label_file_list:
81
+ - ./train_data/train_list.txt
82
+ transforms:
83
+ - DecodeImage:
84
+ img_mode: BGR
85
+ channel_first: false
86
+ - RecConAug:
87
+ prob: 0.5
88
+ ext_data_num: 2
89
+ image_shape: [48, 320, 3]
90
+ max_text_length: *max_text_length
91
+ - RecAug:
92
+ - MultiLabelEncode:
93
+ gtc_encode: NRTRLabelEncode
94
+ - KeepKeys:
95
+ keep_keys:
96
+ - image
97
+ - label_ctc
98
+ - label_gtc
99
+ - length
100
+ - valid_ratio
101
+ sampler:
102
+ name: MultiScaleSampler
103
+ scales: [[320, 32], [320, 48], [320, 64]]
104
+ first_bs: &bs 192
105
+ fix_bs: false
106
+ divided_factor: [8, 16] # w, h
107
+ is_training: True
108
+ loader:
109
+ shuffle: true
110
+ batch_size_per_card: *bs
111
+ drop_last: true
112
+ num_workers: 8
113
+ Eval:
114
+ dataset:
115
+ name: SimpleDataSet
116
+ data_dir: ./train_data
117
+ label_file_list:
118
+ - ./train_data/val_list.txt
119
+ transforms:
120
+ - DecodeImage:
121
+ img_mode: BGR
122
+ channel_first: false
123
+ - MultiLabelEncode:
124
+ gtc_encode: NRTRLabelEncode
125
+ - RecResizeImg:
126
+ image_shape: [3, 48, 320]
127
+ - KeepKeys:
128
+ keep_keys:
129
+ - image
130
+ - label_ctc
131
+ - label_gtc
132
+ - length
133
+ - valid_ratio
134
+ loader:
135
+ shuffle: false
136
+ drop_last: false
137
+ batch_size_per_card: 128
138
+ num_workers: 4
ch_PP-OCRv4_rec_hgnet.yml ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Global:
2
+ debug: false
3
+ use_gpu: true
4
+ epoch_num: 200
5
+ log_smooth_window: 20
6
+ print_batch_step: 10
7
+ save_model_dir: ./output/rec_ppocr_v4_hgnet
8
+ save_epoch_step: 10
9
+ eval_batch_step: [0, 2000]
10
+ cal_metric_during_train: true
11
+ pretrained_model:
12
+ checkpoints:
13
+ save_inference_dir:
14
+ use_visualdl: false
15
+ infer_img: doc/imgs_words/ch/word_1.jpg
16
+ character_dict_path: pytorchocr/utils/ppocr_keys_v1.txt
17
+ max_text_length: &max_text_length 25
18
+ infer_mode: false
19
+ use_space_char: true
20
+ distributed: true
21
+ save_res_path: ./output/rec/predicts_ppocrv3.txt
22
+
23
+
24
+ Optimizer:
25
+ name: Adam
26
+ beta1: 0.9
27
+ beta2: 0.999
28
+ lr:
29
+ name: Cosine
30
+ learning_rate: 0.001
31
+ warmup_epoch: 5
32
+ regularizer:
33
+ name: L2
34
+ factor: 3.0e-05
35
+
36
+
37
+ Architecture:
38
+ model_type: rec
39
+ algorithm: SVTR_HGNet
40
+ Transform:
41
+ Backbone:
42
+ name: PPHGNet_small
43
+ Head:
44
+ name: MultiHead
45
+ head_list:
46
+ - CTCHead:
47
+ Neck:
48
+ name: svtr
49
+ dims: 120
50
+ depth: 2
51
+ hidden_dims: 120
52
+ kernel_size: [1, 3]
53
+ use_guide: True
54
+ Head:
55
+ fc_decay: 0.00001
56
+ - NRTRHead:
57
+ nrtr_dim: 384
58
+ max_text_length: *max_text_length
59
+
60
+ Loss:
61
+ name: MultiLoss
62
+ loss_config_list:
63
+ - CTCLoss:
64
+ - NRTRLoss:
65
+
66
+ PostProcess:
67
+ name: CTCLabelDecode
68
+
69
+ Metric:
70
+ name: RecMetric
71
+ main_indicator: acc
72
+
73
+ Train:
74
+ dataset:
75
+ name: MultiScaleDataSet
76
+ ds_width: false
77
+ data_dir: ./train_data/
78
+ ext_op_transform_idx: 1
79
+ label_file_list:
80
+ - ./train_data/train_list.txt
81
+ transforms:
82
+ - DecodeImage:
83
+ img_mode: BGR
84
+ channel_first: false
85
+ - RecConAug:
86
+ prob: 0.5
87
+ ext_data_num: 2
88
+ image_shape: [48, 320, 3]
89
+ max_text_length: *max_text_length
90
+ - RecAug:
91
+ - MultiLabelEncode:
92
+ gtc_encode: NRTRLabelEncode
93
+ - KeepKeys:
94
+ keep_keys:
95
+ - image
96
+ - label_ctc
97
+ - label_gtc
98
+ - length
99
+ - valid_ratio
100
+ sampler:
101
+ name: MultiScaleSampler
102
+ scales: [[320, 32], [320, 48], [320, 64]]
103
+ first_bs: &bs 128
104
+ fix_bs: false
105
+ divided_factor: [8, 16] # w, h
106
+ is_training: True
107
+ loader:
108
+ shuffle: true
109
+ batch_size_per_card: *bs
110
+ drop_last: true
111
+ num_workers: 8
112
+ Eval:
113
+ dataset:
114
+ name: SimpleDataSet
115
+ data_dir: ./train_data
116
+ label_file_list:
117
+ - ./train_data/val_list.txt
118
+ transforms:
119
+ - DecodeImage:
120
+ img_mode: BGR
121
+ channel_first: false
122
+ - MultiLabelEncode:
123
+ gtc_encode: NRTRLabelEncode
124
+ - RecResizeImg:
125
+ image_shape: [3, 48, 320]
126
+ - KeepKeys:
127
+ keep_keys:
128
+ - image
129
+ - label_ctc
130
+ - label_gtc
131
+ - length
132
+ - valid_ratio
133
+ loader:
134
+ shuffle: false
135
+ drop_last: false
136
+ batch_size_per_card: 128
137
+ num_workers: 4
en_PP-OCRv4_rec.yml ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Global:
2
+ debug: false
3
+ use_gpu: true
4
+ epoch_num: 50
5
+ log_smooth_window: 20
6
+ print_batch_step: 10
7
+ save_model_dir: ./output/rec_ppocr_v4
8
+ save_epoch_step: 10
9
+ eval_batch_step:
10
+ - 0
11
+ - 2000
12
+ cal_metric_during_train: true
13
+ pretrained_model: refactor
14
+ checkpoints: null
15
+ save_inference_dir: null
16
+ use_visualdl: false
17
+ infer_img: doc/imgs_words/ch/word_1.jpg
18
+ character_dict_path: pytorchocr/utils/en_dict.txt
19
+ max_text_length: 25
20
+ infer_mode: false
21
+ use_space_char: true
22
+ distributed: true
23
+ save_res_path: ./output/rec/predicts_ppocrv3.txt
24
+ Optimizer:
25
+ name: Adam
26
+ beta1: 0.9
27
+ beta2: 0.999
28
+ lr:
29
+ name: Cosine
30
+ learning_rate: 0.0005
31
+ warmup_epoch: 5
32
+ regularizer:
33
+ name: L2
34
+ factor: 3.0e-05
35
+ Architecture:
36
+ model_type: rec
37
+ algorithm: SVTR_LCNet
38
+ Transform: null
39
+ Backbone:
40
+ name: PPLCNetV3
41
+ scale: 0.95
42
+ Head:
43
+ name: MultiHead
44
+ head_list:
45
+ - CTCHead:
46
+ Neck:
47
+ name: svtr
48
+ dims: 120
49
+ depth: 2
50
+ hidden_dims: 120
51
+ kernel_size:
52
+ - 1
53
+ - 3
54
+ use_guide: true
55
+ Head:
56
+ fc_decay: 1.0e-05
57
+ - NRTRHead:
58
+ nrtr_dim: 384
59
+ max_text_length: 25
60
+ Loss:
61
+ name: MultiLoss
62
+ loss_config_list:
63
+ - CTCLoss: null
64
+ - NRTRLoss: null
65
+ PostProcess:
66
+ name: CTCLabelDecode
67
+ Metric:
68
+ name: RecMetric
69
+ main_indicator: acc
70
+ ignore_space: false
71
+ Train:
72
+ dataset:
73
+ name: MultiScaleDataSet
74
+ ds_width: false
75
+ data_dir: ./train_data/
76
+ ext_op_transform_idx: 1
77
+ label_file_list:
78
+ - ./train_data/train_list.txt
79
+ transforms:
80
+ - DecodeImage:
81
+ img_mode: BGR
82
+ channel_first: false
83
+ - RecConAug:
84
+ prob: 0.5
85
+ ext_data_num: 2
86
+ image_shape:
87
+ - 48
88
+ - 320
89
+ - 3
90
+ max_text_length: 25
91
+ - RecAug: null
92
+ - MultiLabelEncode:
93
+ gtc_encode: NRTRLabelEncode
94
+ - KeepKeys:
95
+ keep_keys:
96
+ - image
97
+ - label_ctc
98
+ - label_gtc
99
+ - length
100
+ - valid_ratio
101
+ sampler:
102
+ name: MultiScaleSampler
103
+ scales:
104
+ - - 320
105
+ - 32
106
+ - - 320
107
+ - 48
108
+ - - 320
109
+ - 64
110
+ first_bs: 96
111
+ fix_bs: false
112
+ divided_factor:
113
+ - 8
114
+ - 16
115
+ is_training: true
116
+ loader:
117
+ shuffle: true
118
+ batch_size_per_card: 96
119
+ drop_last: true
120
+ num_workers: 8
121
+ Eval:
122
+ dataset:
123
+ name: SimpleDataSet
124
+ data_dir: ./train_data
125
+ label_file_list:
126
+ - ./train_data/val_list.txt
127
+ transforms:
128
+ - DecodeImage:
129
+ img_mode: BGR
130
+ channel_first: false
131
+ - MultiLabelEncode:
132
+ gtc_encode: NRTRLabelEncode
133
+ - RecResizeImg:
134
+ image_shape:
135
+ - 3
136
+ - 48
137
+ - 320
138
+ - KeepKeys:
139
+ keep_keys:
140
+ - image
141
+ - label_ctc
142
+ - label_gtc
143
+ - length
144
+ - valid_ratio
145
+ loader:
146
+ shuffle: false
147
+ drop_last: false
148
+ batch_size_per_card: 128
149
+ num_workers: 4
150
+ profiler_options: null