tangledgroup
/

tangled-llama-i-128k-v0.1

Text Generation

Inference Endpoints

Model card Files Files and versions Community

mtasic85 commited on 5 days ago

Commit

08ac2cf

•

1 Parent(s): 5db1795

pretrain model

Files changed (1) hide show

scripts/pretrain-model.yaml +4 -4

scripts/pretrain-model.yaml CHANGED Viewed

@@ -86,7 +86,7 @@ train:
   max_steps:
   # Limits the length of samples. Off by default (type: Optional[int], default: null)
-  # max_seq_length: 8193
   # Whether to tie the embedding weights with the language modeling head weights. (type: Optional[bool], default: False)
   tie_embeddings: true
@@ -121,15 +121,15 @@ optimizer:
   init_args:
     #   (type: float, default: 0.001)
-    lr: 1e-4
     #   (type: float, default: 0.01)
-    weight_decay: 1e-2
     #   (type: tuple, default: (0.9,0.999))
     betas:
       - 0.9
-      - 0.999
 # How many devices/GPUs to use. Uses all GPUs by default. (type: Union[int, str], default: auto)
 devices: auto

   max_steps:
   # Limits the length of samples. Off by default (type: Optional[int], default: null)
+  max_seq_length: 8193
   # Whether to tie the embedding weights with the language modeling head weights. (type: Optional[bool], default: False)
   tie_embeddings: true
   init_args:
     #   (type: float, default: 0.001)
+    lr: 4e-04
     #   (type: float, default: 0.01)
+    weight_decay: 0.1
     #   (type: tuple, default: (0.9,0.999))
     betas:
       - 0.9
+      - 0.95
 # How many devices/GPUs to use. Uses all GPUs by default. (type: Union[int, str], default: auto)
 devices: auto