ruanmelio commited on
Commit
31be087
·
verified ·
1 Parent(s): ec14681

Upload config.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.yaml +129 -0
config.yaml ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ name: "dyu_fr_transformer-sp"
3
+ joeynmt_version: "2.3.0"
4
+ model_dir: "/app/saved_model"
5
+ use_cuda: False # False for CPU training
6
+ fp16: False
7
+
8
+ data:
9
+ train: "../data/dyu_fr"
10
+ dev: "../data/dyu_fr"
11
+ test: "../data/dyu_fr"
12
+ dataset_type: "huggingface"
13
+ dataset_cfg:
14
+ name: "dyu-fr"
15
+ sample_dev_subset: 1460
16
+ src:
17
+ lang: "dyu"
18
+ max_length: 100
19
+ lowercase: False
20
+ normalize: False
21
+ level: "bpe"
22
+ voc_limit: 4000
23
+ voc_min_freq: 1
24
+ voc_file: "/app/saved_model/vocab.txt"
25
+ tokenizer_type: "sentencepiece"
26
+ tokenizer_cfg:
27
+ model_file: "/app/saved_model/sp.model"
28
+ trg:
29
+ lang: "fr"
30
+ max_length: 100
31
+ lowercase: False
32
+ normalize: False
33
+ level: "bpe"
34
+ voc_limit: 4000
35
+ voc_min_freq: 1
36
+ voc_file: "/app/saved_model/vocab.txt"
37
+ tokenizer_type: "sentencepiece"
38
+ tokenizer_cfg:
39
+ model_file: "/app/saved_model/sp.model"
40
+ special_symbols:
41
+ unk_token: "<unk>"
42
+ unk_id: 0
43
+ pad_token: "<pad>"
44
+ pad_id: 1
45
+ bos_token: "<s>"
46
+ bos_id: 2
47
+ eos_token: "</s>"
48
+ eos_id: 3
49
+
50
+
51
+ testing:
52
+ load_model: "/app/saved_model/best.ckpt"
53
+ n_best: 1
54
+ beam_size: 5
55
+ beam_alpha: 1.0
56
+ batch_size: 256
57
+ batch_type: "token"
58
+ max_output_length: 100
59
+ eval_metrics: ["bleu"]
60
+ #return_prob: "hyp"
61
+ #return_attention: False
62
+ sacrebleu_cfg:
63
+ tokenize: "13a"
64
+
65
+ training:
66
+ #load_model: "/app/saved_model/latest.ckpt"
67
+ #reset_best_ckpt: False
68
+ #reset_scheduler: False
69
+ #reset_optimizer: False
70
+ #reset_iter_state: False
71
+ random_seed: 42
72
+ optimizer: "adamw"
73
+ normalization: "tokens"
74
+ adam_betas: [0.9, 0.999]
75
+ scheduling: "warmupinversesquareroot"
76
+ learning_rate_warmup: 100
77
+ learning_rate: 0.0003
78
+ learning_rate_min: 0.00000001
79
+ weight_decay: 0.0
80
+ label_smoothing: 0.1
81
+ loss: "crossentropy"
82
+ batch_size: 512
83
+ batch_type: "token"
84
+ batch_multiplier: 4
85
+ early_stopping_metric: "bleu"
86
+ epochs: 6
87
+ updates: 550
88
+ validation_freq: 30
89
+ logging_freq: 5
90
+ overwrite: True
91
+ shuffle: True
92
+ print_valid_sents: [0, 1, 2, 3]
93
+ keep_best_ckpts: 3
94
+
95
+ model:
96
+ initializer: "xavier_uniform"
97
+ bias_initializer: "zeros"
98
+ init_gain: 1.0
99
+ embed_initializer: "xavier_uniform"
100
+ embed_init_gain: 1.0
101
+ tied_embeddings: True
102
+ tied_softmax: True
103
+ encoder:
104
+ type: "transformer"
105
+ num_layers: 6
106
+ num_heads: 4
107
+ embeddings:
108
+ embedding_dim: 256
109
+ scale: True
110
+ dropout: 0.0
111
+ # typically ff_size = 4 x hidden_size
112
+ hidden_size: 256
113
+ ff_size: 1024
114
+ dropout: 0.2
115
+ layer_norm: "pre"
116
+ decoder:
117
+ type: "transformer"
118
+ num_layers: 6
119
+ num_heads: 8
120
+ embeddings:
121
+ embedding_dim: 256
122
+ scale: True
123
+ dropout: 0.0
124
+ # typically ff_size = 4 x hidden_size
125
+ hidden_size: 256
126
+ ff_size: 1024
127
+ dropout: 0.1
128
+ layer_norm: "pre"
129
+