Cyril666 commited on
Commit
7a8bc63
·
1 Parent(s): 1a827c6

First model version

Browse files
configs/pretrain_language_model.yaml ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ global:
2
+ name: pretrain-language-model
3
+ phase: train
4
+ stage: pretrain-language
5
+ workdir: workdir
6
+ seed: ~
7
+
8
+ dataset:
9
+ train: {
10
+ roots: ['data/WikiText-103.csv'],
11
+ batch_size: 4096
12
+ }
13
+ test: {
14
+ roots: ['data/WikiText-103_eval_d1.csv'],
15
+ batch_size: 4096
16
+ }
17
+
18
+ training:
19
+ epochs: 80
20
+ show_iters: 50
21
+ eval_iters: 6000
22
+ save_iters: 3000
23
+
24
+ optimizer:
25
+ type: Adam
26
+ true_wd: False
27
+ wd: 0.0
28
+ bn_wd: False
29
+ clip_grad: 20
30
+ lr: 0.0001
31
+ args: {
32
+ betas: !!python/tuple [0.9, 0.999], # for default Adam
33
+ }
34
+ scheduler: {
35
+ periods: [70, 10],
36
+ gamma: 0.1,
37
+ }
38
+
39
+ model:
40
+ name: 'modules.model_language.BCNLanguage'
41
+ language: {
42
+ num_layers: 4,
43
+ loss_weight: 1.,
44
+ use_self_attn: False
45
+ }
configs/pretrain_vision_model.yaml ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ global:
2
+ name: pretrain-vision-model
3
+ phase: train
4
+ stage: pretrain-vision
5
+ workdir: workdir
6
+ seed: ~
7
+
8
+ dataset:
9
+ train: {
10
+ roots: ['data/training/MJ/MJ_train/',
11
+ 'data/training/MJ/MJ_test/',
12
+ 'data/training/MJ/MJ_valid/',
13
+ 'data/training/ST'],
14
+ batch_size: 384
15
+ }
16
+ test: {
17
+ roots: ['data/evaluation/IIIT5k_3000',
18
+ 'data/evaluation/SVT',
19
+ 'data/evaluation/SVTP',
20
+ 'data/evaluation/IC13_857',
21
+ 'data/evaluation/IC15_1811',
22
+ 'data/evaluation/CUTE80'],
23
+ batch_size: 384
24
+ }
25
+ data_aug: True
26
+ multiscales: False
27
+ num_workers: 14
28
+
29
+ training:
30
+ epochs: 8
31
+ show_iters: 50
32
+ eval_iters: 3000
33
+ save_iters: 3000
34
+
35
+ optimizer:
36
+ type: Adam
37
+ true_wd: False
38
+ wd: 0.0
39
+ bn_wd: False
40
+ clip_grad: 20
41
+ lr: 0.0001
42
+ args: {
43
+ betas: !!python/tuple [0.9, 0.999], # for default Adam
44
+ }
45
+ scheduler: {
46
+ periods: [6, 2],
47
+ gamma: 0.1,
48
+ }
49
+
50
+ model:
51
+ name: 'modules.model_vision.BaseVision'
52
+ checkpoint: ~
53
+ vision: {
54
+ loss_weight: 1.,
55
+ attention: 'position',
56
+ backbone: 'transformer',
57
+ backbone_ln: 3,
58
+ }
configs/pretrain_vision_model_sv.yaml ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ global:
2
+ name: pretrain-vision-model-sv
3
+ phase: train
4
+ stage: pretrain-vision
5
+ workdir: workdir
6
+ seed: ~
7
+
8
+ dataset:
9
+ train: {
10
+ roots: ['data/training/MJ/MJ_train/',
11
+ 'data/training/MJ/MJ_test/',
12
+ 'data/training/MJ/MJ_valid/',
13
+ 'data/training/ST'],
14
+ batch_size: 384
15
+ }
16
+ test: {
17
+ roots: ['data/evaluation/IIIT5k_3000',
18
+ 'data/evaluation/SVT',
19
+ 'data/evaluation/SVTP',
20
+ 'data/evaluation/IC13_857',
21
+ 'data/evaluation/IC15_1811',
22
+ 'data/evaluation/CUTE80'],
23
+ batch_size: 384
24
+ }
25
+ data_aug: True
26
+ multiscales: False
27
+ num_workers: 14
28
+
29
+ training:
30
+ epochs: 8
31
+ show_iters: 50
32
+ eval_iters: 3000
33
+ save_iters: 3000
34
+
35
+ optimizer:
36
+ type: Adam
37
+ true_wd: False
38
+ wd: 0.0
39
+ bn_wd: False
40
+ clip_grad: 20
41
+ lr: 0.0001
42
+ args: {
43
+ betas: !!python/tuple [0.9, 0.999], # for default Adam
44
+ }
45
+ scheduler: {
46
+ periods: [6, 2],
47
+ gamma: 0.1,
48
+ }
49
+
50
+ model:
51
+ name: 'modules.model_vision.BaseVision'
52
+ checkpoint: ~
53
+ vision: {
54
+ loss_weight: 1.,
55
+ attention: 'attention',
56
+ backbone: 'transformer',
57
+ backbone_ln: 2,
58
+ }
configs/template.yaml ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ global:
2
+ name: exp
3
+ phase: train
4
+ stage: pretrain-vision
5
+ workdir: /tmp/workdir
6
+ seed: ~
7
+
8
+ dataset:
9
+ train: {
10
+ roots: ['data/training/MJ/MJ_train/',
11
+ 'data/training/MJ/MJ_test/',
12
+ 'data/training/MJ/MJ_valid/',
13
+ 'data/training/ST'],
14
+ batch_size: 128
15
+ }
16
+ test: {
17
+ roots: ['data/evaluation/IIIT5k_3000',
18
+ 'data/evaluation/SVT',
19
+ 'data/evaluation/SVTP',
20
+ 'data/evaluation/IC13_857',
21
+ 'data/evaluation/IC15_1811',
22
+ 'data/evaluation/CUTE80'],
23
+ batch_size: 128
24
+ }
25
+ charset_path: data/charset_36.txt
26
+ num_workers: 4
27
+ max_length: 25 # 30
28
+ image_height: 32
29
+ image_width: 128
30
+ case_sensitive: False
31
+ eval_case_sensitive: False
32
+ data_aug: True
33
+ multiscales: False
34
+ pin_memory: True
35
+ smooth_label: False
36
+ smooth_factor: 0.1
37
+ one_hot_y: True
38
+ use_sm: False
39
+
40
+ training:
41
+ epochs: 6
42
+ show_iters: 50
43
+ eval_iters: 3000
44
+ save_iters: 20000
45
+ start_iters: 0
46
+ stats_iters: 100000
47
+
48
+ optimizer:
49
+ type: Adadelta # Adadelta, Adam
50
+ true_wd: False
51
+ wd: 0. # 0.001
52
+ bn_wd: False
53
+ args: {
54
+ # betas: !!python/tuple [0.9, 0.99], # betas=(0.9,0.99) for AdamW
55
+ # betas: !!python/tuple [0.9, 0.999], # for default Adam
56
+ }
57
+ clip_grad: 20
58
+ lr: [1.0, 1.0, 1.0] # lr: [0.005, 0.005, 0.005]
59
+ scheduler: {
60
+ periods: [3, 2, 1],
61
+ gamma: 0.1,
62
+ }
63
+
64
+ model:
65
+ name: 'modules.model_abinet.ABINetModel'
66
+ checkpoint: ~
67
+ strict: True
configs/train_abinet.yaml ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ global:
2
+ name: train-abinet
3
+ phase: train
4
+ stage: train-super
5
+ workdir: workdir
6
+ seed: ~
7
+
8
+ dataset:
9
+ train: {
10
+ roots: ['data/training/MJ/MJ_train/',
11
+ 'data/training/MJ/MJ_test/',
12
+ 'data/training/MJ/MJ_valid/',
13
+ 'data/training/ST'],
14
+ batch_size: 384
15
+ }
16
+ test: {
17
+ roots: ['data/evaluation/IIIT5k_3000',
18
+ 'data/evaluation/SVT',
19
+ 'data/evaluation/SVTP',
20
+ 'data/evaluation/IC13_857',
21
+ 'data/evaluation/IC15_1811',
22
+ 'data/evaluation/CUTE80'],
23
+ batch_size: 384
24
+ }
25
+ data_aug: True
26
+ multiscales: False
27
+ num_workers: 14
28
+
29
+ training:
30
+ epochs: 10
31
+ show_iters: 50
32
+ eval_iters: 3000
33
+ save_iters: 3000
34
+
35
+ optimizer:
36
+ type: Adam
37
+ true_wd: False
38
+ wd: 0.0
39
+ bn_wd: False
40
+ clip_grad: 20
41
+ lr: 0.0001
42
+ args: {
43
+ betas: !!python/tuple [0.9, 0.999], # for default Adam
44
+ }
45
+ scheduler: {
46
+ periods: [6, 4],
47
+ gamma: 0.1,
48
+ }
49
+
50
+ model:
51
+ name: 'modules.model_abinet_iter.ABINetIterModel'
52
+ iter_size: 3
53
+ ensemble: ''
54
+ use_vision: False
55
+ vision: {
56
+ checkpoint: workdir/pretrain-vision-model/best-pretrain-vision-model.pth,
57
+ loss_weight: 1.,
58
+ attention: 'position',
59
+ backbone: 'transformer',
60
+ backbone_ln: 3,
61
+ }
62
+ language: {
63
+ checkpoint: workdir/pretrain-language-model/pretrain-language-model.pth,
64
+ num_layers: 4,
65
+ loss_weight: 1.,
66
+ detach: True,
67
+ use_self_attn: False
68
+ }
69
+ alignment: {
70
+ loss_weight: 1.,
71
+ }
configs/train_abinet_sv.yaml ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ global:
2
+ name: train-abinet-sv
3
+ phase: train
4
+ stage: train-super
5
+ workdir: workdir
6
+ seed: ~
7
+
8
+ dataset:
9
+ train: {
10
+ roots: ['data/training/MJ/MJ_train/',
11
+ 'data/training/MJ/MJ_test/',
12
+ 'data/training/MJ/MJ_valid/',
13
+ 'data/training/ST'],
14
+ batch_size: 384
15
+ }
16
+ test: {
17
+ roots: ['data/evaluation/IIIT5k_3000',
18
+ 'data/evaluation/SVT',
19
+ 'data/evaluation/SVTP',
20
+ 'data/evaluation/IC13_857',
21
+ 'data/evaluation/IC15_1811',
22
+ 'data/evaluation/CUTE80'],
23
+ batch_size: 384
24
+ }
25
+ data_aug: True
26
+ multiscales: False
27
+ num_workers: 14
28
+
29
+ training:
30
+ epochs: 10
31
+ show_iters: 50
32
+ eval_iters: 3000
33
+ save_iters: 3000
34
+
35
+ optimizer:
36
+ type: Adam
37
+ true_wd: False
38
+ wd: 0.0
39
+ bn_wd: False
40
+ clip_grad: 20
41
+ lr: 0.0001
42
+ args: {
43
+ betas: !!python/tuple [0.9, 0.999], # for default Adam
44
+ }
45
+ scheduler: {
46
+ periods: [6, 4],
47
+ gamma: 0.1,
48
+ }
49
+
50
+ model:
51
+ name: 'modules.model_abinet_iter.ABINetIterModel'
52
+ iter_size: 3
53
+ ensemble: ''
54
+ use_vision: False
55
+ vision: {
56
+ checkpoint: workdir/pretrain-vision-model-sv/best-pretrain-vision-model-sv.pth,
57
+ loss_weight: 1.,
58
+ attention: 'attention',
59
+ backbone: 'transformer',
60
+ backbone_ln: 2,
61
+ }
62
+ language: {
63
+ checkpoint: workdir/pretrain-language-model/pretrain-language-model.pth,
64
+ num_layers: 4,
65
+ loss_weight: 1.,
66
+ detach: True,
67
+ use_self_attn: False
68
+ }
69
+ alignment: {
70
+ loss_weight: 1.,
71
+ }
configs/train_abinet_wo_iter.yaml ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ global:
2
+ name: train-abinet-wo-iter
3
+ phase: train
4
+ stage: train-super
5
+ workdir: workdir
6
+ seed: ~
7
+
8
+ dataset:
9
+ train: {
10
+ roots: ['data/training/MJ/MJ_train/',
11
+ 'data/training/MJ/MJ_test/',
12
+ 'data/training/MJ/MJ_valid/',
13
+ 'data/training/ST'],
14
+ batch_size: 384
15
+ }
16
+ test: {
17
+ roots: ['data/evaluation/IIIT5k_3000',
18
+ 'data/evaluation/SVT',
19
+ 'data/evaluation/SVTP',
20
+ 'data/evaluation/IC13_857',
21
+ 'data/evaluation/IC15_1811',
22
+ 'data/evaluation/CUTE80'],
23
+ batch_size: 384
24
+ }
25
+ data_aug: True
26
+ multiscales: False
27
+ num_workers: 14
28
+
29
+ training:
30
+ epochs: 10
31
+ show_iters: 50
32
+ eval_iters: 3000
33
+ save_iters: 3000
34
+
35
+ optimizer:
36
+ type: Adam
37
+ true_wd: False
38
+ wd: 0.0
39
+ bn_wd: False
40
+ clip_grad: 20
41
+ lr: 0.0001
42
+ args: {
43
+ betas: !!python/tuple [0.9, 0.999], # for default Adam
44
+ }
45
+ scheduler: {
46
+ periods: [6, 4],
47
+ gamma: 0.1,
48
+ }
49
+
50
+ model:
51
+ name: 'modules.model_abinet.ABINetModel'
52
+ vision: {
53
+ checkpoint: workdir/pretrain-vision-model/best-pretrain-vision-model.pth,
54
+ loss_weight: 1.,
55
+ attention: 'position',
56
+ backbone: 'transformer',
57
+ backbone_ln: 3,
58
+ }
59
+ language: {
60
+ checkpoint: workdir/pretrain-language-model/pretrain-language-model.pth,
61
+ num_layers: 4,
62
+ loss_weight: 1.,
63
+ detach: True,
64
+ use_self_attn: False
65
+ }
66
+ alignment: {
67
+ loss_weight: 1.,
68
+ }