ramdhanfirdaus commited on
Commit
e992e89
1 Parent(s): 7bb504a

Training in progress, step 3100, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -201,18 +201,6 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
201
  ## Training procedure
202
 
203
 
204
- The following `bitsandbytes` quantization config was used during training:
205
- - quant_method: bitsandbytes
206
- - load_in_8bit: False
207
- - load_in_4bit: True
208
- - llm_int8_threshold: 6.0
209
- - llm_int8_skip_modules: None
210
- - llm_int8_enable_fp32_cpu_offload: False
211
- - llm_int8_has_fp16_weight: False
212
- - bnb_4bit_quant_type: nf4
213
- - bnb_4bit_use_double_quant: True
214
- - bnb_4bit_compute_dtype: float16
215
-
216
  ### Framework versions
217
 
218
 
 
201
  ## Training procedure
202
 
203
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  ### Framework versions
205
 
206
 
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:027649fdc9bb39375f7877e818837eec9151d245c636ff7645c95d33d3a52732
3
  size 75507072
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:153147260199c704e8bfd106addf0bce7dd09e996a0b77778bff08d6d95bc8c8
3
  size 75507072
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ff7270ada9cf8cf5634d4915d2aa34fa7bf7f529feeec57587911f95fff17cb
3
- size 151034501
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f8382c25b1ec233f3d8dde2bd7aaeff523df68b0959f1aa2b74feb2306b777e
3
+ size 151032837
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5111e44aa3c7fb3d2679f29ed14de9b378a43fa023e1e3c7c0e2bac399b6ea6f
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab6b545b6dd6418e570a8474ea31c45a1baed67ab97da8014ad68491fbcf9bed
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31f466e63f2d702a9646f61d3cb0499d7a443ca833cfea51694a53eaa24cfd01
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f4766a50a63740d51a19c983a4de442d7f62ff1e951fc016c44cc6c58fb9db1
3
  size 627
last-checkpoint/special_tokens_map.json CHANGED
@@ -12,6 +12,12 @@
12
  ">>SUFFIX<<",
13
  ">>MIDDLE<<"
14
  ],
15
- "eos_token": "<|endoftext|>",
 
 
 
 
 
 
16
  "pad_token": "<|endoftext|>"
17
  }
 
12
  ">>SUFFIX<<",
13
  ">>MIDDLE<<"
14
  ],
15
+ "eos_token": {
16
+ "content": "<|endoftext|>",
17
+ "lstrip": false,
18
+ "normalized": false,
19
+ "rstrip": false,
20
+ "single_word": false
21
+ },
22
  "pad_token": "<|endoftext|>"
23
  }
last-checkpoint/tokenizer_config.json CHANGED
@@ -113,11 +113,15 @@
113
  ],
114
  "clean_up_tokenization_spaces": true,
115
  "eos_token": "<|endoftext|>",
 
116
  "model_input_names": [
117
  "input_ids",
118
  "attention_mask"
119
  ],
120
  "model_max_length": 2048,
121
  "pad_token": "<|endoftext|>",
122
- "tokenizer_class": "PreTrainedTokenizerFast"
 
 
 
123
  }
 
113
  ],
114
  "clean_up_tokenization_spaces": true,
115
  "eos_token": "<|endoftext|>",
116
+ "max_length": 512,
117
  "model_input_names": [
118
  "input_ids",
119
  "attention_mask"
120
  ],
121
  "model_max_length": 2048,
122
  "pad_token": "<|endoftext|>",
123
+ "stride": 0,
124
+ "tokenizer_class": "PreTrainedTokenizerFast",
125
+ "truncation_side": "right",
126
+ "truncation_strategy": "longest_first"
127
  }
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.128933310508728,
3
- "best_model_checkpoint": "./outputs/checkpoint-3500",
4
- "epoch": 2.5500910746812386,
5
  "eval_steps": 100,
6
- "global_step": 3500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -431,79 +431,23 @@
431
  {
432
  "epoch": 2.26,
433
  "learning_rate": 0.0002,
434
- "loss": 1.1391,
435
  "step": 3100
436
  },
437
  {
438
  "epoch": 2.26,
439
- "eval_loss": 1.1678684949874878,
440
- "eval_runtime": 418.5265,
441
- "eval_samples_per_second": 14.991,
442
- "eval_steps_per_second": 1.876,
443
  "step": 3100
444
- },
445
- {
446
- "epoch": 2.33,
447
- "learning_rate": 0.0002,
448
- "loss": 1.1423,
449
- "step": 3200
450
- },
451
- {
452
- "epoch": 2.33,
453
- "eval_loss": 1.1581230163574219,
454
- "eval_runtime": 425.212,
455
- "eval_samples_per_second": 14.755,
456
- "eval_steps_per_second": 1.846,
457
- "step": 3200
458
- },
459
- {
460
- "epoch": 2.4,
461
- "learning_rate": 0.0002,
462
- "loss": 1.1262,
463
- "step": 3300
464
- },
465
- {
466
- "epoch": 2.4,
467
- "eval_loss": 1.147517442703247,
468
- "eval_runtime": 424.8484,
469
- "eval_samples_per_second": 14.768,
470
- "eval_steps_per_second": 1.848,
471
- "step": 3300
472
- },
473
- {
474
- "epoch": 2.48,
475
- "learning_rate": 0.0002,
476
- "loss": 1.1114,
477
- "step": 3400
478
- },
479
- {
480
- "epoch": 2.48,
481
- "eval_loss": 1.1388046741485596,
482
- "eval_runtime": 418.7042,
483
- "eval_samples_per_second": 14.984,
484
- "eval_steps_per_second": 1.875,
485
- "step": 3400
486
- },
487
- {
488
- "epoch": 2.55,
489
- "learning_rate": 0.0002,
490
- "loss": 1.1112,
491
- "step": 3500
492
- },
493
- {
494
- "epoch": 2.55,
495
- "eval_loss": 1.128933310508728,
496
- "eval_runtime": 418.5353,
497
- "eval_samples_per_second": 14.99,
498
- "eval_steps_per_second": 1.876,
499
- "step": 3500
500
  }
501
  ],
502
  "logging_steps": 100,
503
  "max_steps": 4116,
504
  "num_train_epochs": 3,
505
  "save_steps": 100,
506
- "total_flos": 1.134669408060162e+18,
507
  "trial_name": null,
508
  "trial_params": null
509
  }
 
1
  {
2
+ "best_metric": 1.175557017326355,
3
+ "best_model_checkpoint": "./outputs/checkpoint-3000",
4
+ "epoch": 2.2593806921675776,
5
  "eval_steps": 100,
6
+ "global_step": 3100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
431
  {
432
  "epoch": 2.26,
433
  "learning_rate": 0.0002,
434
+ "loss": 1.1799,
435
  "step": 3100
436
  },
437
  {
438
  "epoch": 2.26,
439
+ "eval_loss": 1.1915525197982788,
440
+ "eval_runtime": 341.6989,
441
+ "eval_samples_per_second": 18.361,
442
+ "eval_steps_per_second": 2.297,
443
  "step": 3100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
444
  }
445
  ],
446
  "logging_steps": 100,
447
  "max_steps": 4116,
448
  "num_train_epochs": 3,
449
  "save_steps": 100,
450
+ "total_flos": 1.0050808386648453e+18,
451
  "trial_name": null,
452
  "trial_params": null
453
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9ab7fd32b0972b4627931468d93dab0ebd2b5e2b7a37d3907f097b2d2c0cfba
3
  size 4219
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac18d79547677891adc0e1a81cd6b672e7bb00fb497880f12dac0fd79be710c2
3
  size 4219