Spaces:

flax-community
/

dalle-mini

Running

boris commited on Jul 14, 2021

Commit

dbe8c41

•

1 Parent(s): 8bb2236

feat: update default parameters

Files changed (2) hide show

seq2seq/run_seq2seq_flax.py CHANGED Viewed

@@ -219,7 +219,7 @@ class DataTrainingArguments:
         default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
     )
     log_interval: Optional[int] = field(
-        default=5,
         metadata={
             "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
             "value if set."

         default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
     )
     log_interval: Optional[int] = field(
+        default=40,
         metadata={
             "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
             "value if set."

seq2seq/sweep.yaml CHANGED Viewed

@@ -9,12 +9,13 @@ parameters:
   learning_rate:
     distribution: log_uniform
     # from exp(min) to exp(max), ie 1e-4 to 5e-3 on log scale
-    min: -9.2
     max: -5.3
   gradient_accumulation_steps:
     value: 8
   warmup_steps:
-    value: 1000
 command:
   - python3
   - ${program}
@@ -29,7 +30,7 @@ command:
   - "--num_train_epochs"
   - 1
   - "--max_train_samples"
-  - 2000000
   - "--per_device_train_batch_size"
   - 56
   - "--per_device_eval_batch_size"

   learning_rate:
     distribution: log_uniform
     # from exp(min) to exp(max), ie 1e-4 to 5e-3 on log scale
+    min: -9.9
     max: -5.3
   gradient_accumulation_steps:
     value: 8
   warmup_steps:
+    # in term of optimization steps so multiplied by gradient accumulation
+    value: 125
 command:
   - python3
   - ${program}
   - "--num_train_epochs"
   - 1
   - "--max_train_samples"
+  - 1500000
   - "--per_device_train_batch_size"
   - 56
   - "--per_device_eval_batch_size"