diabolic6045 commited on
Commit
282d29e
1 Parent(s): 3114e41

End of training

Browse files
Files changed (2) hide show
  1. README.md +5 -5
  2. adapter_model.bin +1 -1
README.md CHANGED
@@ -63,7 +63,7 @@ wandb_name: संस्कृतम्-llama
63
  wandb_log_model:
64
 
65
  gradient_accumulation_steps: 4
66
- micro_batch_size: 1
67
  num_epochs: 1
68
  optimizer: paged_adamw_8bit
69
  lr_scheduler: cosine
@@ -131,14 +131,14 @@ More information needed
131
 
132
  The following hyperparameters were used during training:
133
  - learning_rate: 2e-05
134
- - train_batch_size: 1
135
- - eval_batch_size: 1
136
  - seed: 42
137
  - distributed_type: multi-GPU
138
  - num_devices: 2
139
  - gradient_accumulation_steps: 4
140
- - total_train_batch_size: 8
141
- - total_eval_batch_size: 2
142
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
143
  - lr_scheduler_type: cosine
144
  - lr_scheduler_warmup_steps: 10
 
63
  wandb_log_model:
64
 
65
  gradient_accumulation_steps: 4
66
+ micro_batch_size: 2
67
  num_epochs: 1
68
  optimizer: paged_adamw_8bit
69
  lr_scheduler: cosine
 
131
 
132
  The following hyperparameters were used during training:
133
  - learning_rate: 2e-05
134
+ - train_batch_size: 2
135
+ - eval_batch_size: 2
136
  - seed: 42
137
  - distributed_type: multi-GPU
138
  - num_devices: 2
139
  - gradient_accumulation_steps: 4
140
+ - total_train_batch_size: 16
141
+ - total_eval_batch_size: 4
142
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
143
  - lr_scheduler_type: cosine
144
  - lr_scheduler_warmup_steps: 10
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89d88f847c0e0b16a88c3c027e4ca023c4aa4cb1db074e327121b9c858e90079
3
  size 167843194
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd548dd24c84749e0d56af20848f2169dbca0ab2b67242a58f27e68c2db79019
3
  size 167843194