mtasic85 commited on
Commit
08ac2cf
1 Parent(s): 5db1795

pretrain model

Browse files
Files changed (1) hide show
  1. scripts/pretrain-model.yaml +4 -4
scripts/pretrain-model.yaml CHANGED
@@ -86,7 +86,7 @@ train:
86
  max_steps:
87
 
88
  # Limits the length of samples. Off by default (type: Optional[int], default: null)
89
- # max_seq_length: 8193
90
 
91
  # Whether to tie the embedding weights with the language modeling head weights. (type: Optional[bool], default: False)
92
  tie_embeddings: true
@@ -121,15 +121,15 @@ optimizer:
121
 
122
  init_args:
123
  # (type: float, default: 0.001)
124
- lr: 1e-4
125
 
126
  # (type: float, default: 0.01)
127
- weight_decay: 1e-2
128
 
129
  # (type: tuple, default: (0.9,0.999))
130
  betas:
131
  - 0.9
132
- - 0.999
133
 
134
  # How many devices/GPUs to use. Uses all GPUs by default. (type: Union[int, str], default: auto)
135
  devices: auto
 
86
  max_steps:
87
 
88
  # Limits the length of samples. Off by default (type: Optional[int], default: null)
89
+ max_seq_length: 8193
90
 
91
  # Whether to tie the embedding weights with the language modeling head weights. (type: Optional[bool], default: False)
92
  tie_embeddings: true
 
121
 
122
  init_args:
123
  # (type: float, default: 0.001)
124
+ lr: 4e-04
125
 
126
  # (type: float, default: 0.01)
127
+ weight_decay: 0.1
128
 
129
  # (type: tuple, default: (0.9,0.999))
130
  betas:
131
  - 0.9
132
+ - 0.95
133
 
134
  # How many devices/GPUs to use. Uses all GPUs by default. (type: Union[int, str], default: auto)
135
  devices: auto