Spaces:
Running
Running
feat: update default parameters
Browse files- seq2seq/run_seq2seq_flax.py +1 -1
- seq2seq/sweep.yaml +4 -3
seq2seq/run_seq2seq_flax.py
CHANGED
@@ -219,7 +219,7 @@ class DataTrainingArguments:
|
|
219 |
default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
|
220 |
)
|
221 |
log_interval: Optional[int] = field(
|
222 |
-
default=
|
223 |
metadata={
|
224 |
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
|
225 |
"value if set."
|
|
|
219 |
default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
|
220 |
)
|
221 |
log_interval: Optional[int] = field(
|
222 |
+
default=40,
|
223 |
metadata={
|
224 |
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
|
225 |
"value if set."
|
seq2seq/sweep.yaml
CHANGED
@@ -9,12 +9,13 @@ parameters:
|
|
9 |
learning_rate:
|
10 |
distribution: log_uniform
|
11 |
# from exp(min) to exp(max), ie 1e-4 to 5e-3 on log scale
|
12 |
-
min: -9.
|
13 |
max: -5.3
|
14 |
gradient_accumulation_steps:
|
15 |
value: 8
|
16 |
warmup_steps:
|
17 |
-
|
|
|
18 |
command:
|
19 |
- python3
|
20 |
- ${program}
|
@@ -29,7 +30,7 @@ command:
|
|
29 |
- "--num_train_epochs"
|
30 |
- 1
|
31 |
- "--max_train_samples"
|
32 |
-
-
|
33 |
- "--per_device_train_batch_size"
|
34 |
- 56
|
35 |
- "--per_device_eval_batch_size"
|
|
|
9 |
learning_rate:
|
10 |
distribution: log_uniform
|
11 |
# from exp(min) to exp(max), ie 1e-4 to 5e-3 on log scale
|
12 |
+
min: -9.9
|
13 |
max: -5.3
|
14 |
gradient_accumulation_steps:
|
15 |
value: 8
|
16 |
warmup_steps:
|
17 |
+
# in term of optimization steps so multiplied by gradient accumulation
|
18 |
+
value: 125
|
19 |
command:
|
20 |
- python3
|
21 |
- ${program}
|
|
|
30 |
- "--num_train_epochs"
|
31 |
- 1
|
32 |
- "--max_train_samples"
|
33 |
+
- 1500000
|
34 |
- "--per_device_train_batch_size"
|
35 |
- 56
|
36 |
- "--per_device_eval_batch_size"
|