SinaAhmadi commited on
Commit
fc30d58
1 Parent(s): 0767c4e

Adding config file

Browse files
models/Mazandarni-Persian/config.yaml ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "Mazanderani-Persian_1"
2
+
3
+ data:
4
+ train: "datasets/Mazanderani-Persian/1/train"
5
+ dev: "datasets/Mazanderani-Persian/1/dev"
6
+ test: "datasets/Mazanderani-Persian/1/test"
7
+ level: "char"
8
+ lowercase: False
9
+ normalize: False
10
+ max_sent_length: 100
11
+ dataset_type: "plain"
12
+
13
+ src:
14
+ lang: "src"
15
+ voc_limit: 100
16
+ voc_min_freq: 5
17
+ level: "char"
18
+ trg:
19
+ lang: "trg"
20
+ voc_limit: 100
21
+ voc_min_freq: 5
22
+ level: "char"
23
+
24
+ training:
25
+ random_seed: 42
26
+ optimizer: "adam"
27
+ learning_rate: 0.001
28
+ learning_rate_min: 0.0002
29
+ weight_decay: 0.0
30
+ clip_grad_norm: 1.0
31
+ batch_size: 64
32
+ scheduling: "plateau"
33
+ patience: 10
34
+ decrease_factor: 0.5
35
+ early_stopping_metric: "loss"
36
+ epochs: 80
37
+ validation_freq: 1000
38
+ logging_freq: 100
39
+ eval_metric: "bleu"
40
+ model_dir: "models/Mazanderani-Persian"
41
+ overwrite: True
42
+ shuffle: True
43
+ use_cuda: True
44
+ max_output_length: 100
45
+ print_valid_sents: [0, 3, 6, 9]
46
+ keep_best_ckpts: -1
47
+
48
+ testing:
49
+ n_best: 1
50
+ beam_size: 4
51
+ beam_alpha: 1.0
52
+ eval_metrics: ["bleu", "chrf", "sequence_accuracy"]
53
+ max_output_length: 50
54
+ batch_size: 10
55
+ batch_type: "sentence"
56
+ return_prob: "none"
57
+
58
+ model:
59
+ initializer: "xavier_uniform"
60
+ init_gain: 1.0
61
+ bias_initializer: "zeros"
62
+ embed_initializer: "xavier_uniform"
63
+ embed_init_gain: 1.0
64
+ encoder:
65
+ type: "transformer"
66
+ num_layers: 6
67
+ num_heads: 8
68
+ embeddings:
69
+ embedding_dim: 128
70
+ scale: True
71
+ # typically ff_size = 4 x hidden_size
72
+ hidden_size: 128
73
+ ff_size: 512
74
+ dropout: 0.2
75
+ layer_norm: "pre"
76
+ decoder:
77
+ type: "transformer"
78
+ num_layers: 6
79
+ num_heads: 8
80
+ embeddings:
81
+ embedding_dim: 128
82
+ scale: True
83
+ # typically ff_size = 4 x hidden_size
84
+ hidden_size: 128
85
+ ff_size: 512
86
+ dropout: 0.2
87
+ layer_norm: "pre"