ramdhanfirdaus commited on
Commit
db61bf0
·
1 Parent(s): 6cddd10

Training in progress, step 3200, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -201,18 +201,6 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
201
  ## Training procedure
202
 
203
 
204
- The following `bitsandbytes` quantization config was used during training:
205
- - quant_method: bitsandbytes
206
- - load_in_8bit: False
207
- - load_in_4bit: True
208
- - llm_int8_threshold: 6.0
209
- - llm_int8_skip_modules: None
210
- - llm_int8_enable_fp32_cpu_offload: False
211
- - llm_int8_has_fp16_weight: False
212
- - bnb_4bit_quant_type: nf4
213
- - bnb_4bit_use_double_quant: True
214
- - bnb_4bit_compute_dtype: float16
215
-
216
  ### Framework versions
217
 
218
 
 
201
  ## Training procedure
202
 
203
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  ### Framework versions
205
 
206
 
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:648676827f40593439367dacf2ad638522ef14510808c4ba2af6eabf42c8342c
3
  size 75507072
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7d3db52e4d0054936ff52bf1ad37ac5cbca2e26a2f3c0b3c23351936d9dfd6b
3
  size 75507072
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98b8c16c0f5f93065962a416a72869294e42de29542f91565e010fff2a0b375f
3
- size 151034501
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbee7f9a425faba7846fad117ec9afb00a6786a4b4af07085358abd08bc4bb53
3
+ size 151032837
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63998d98da08278c470436d7f3090e0552bc25f2e2fd93ff495fe8ccda5df6f6
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eefe083b6454775aee01bb69e64bad53187f7b97719dea614c013fe397ac511b
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43e3cf8d56a3f083d00cc85544d76ada2f884a1018c8752332d96f2799911117
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50b57a34df83b700e2c13775ff734b4569b74ce7e20da3479db76577bb4e906e
3
  size 627
last-checkpoint/special_tokens_map.json CHANGED
@@ -12,6 +12,12 @@
12
  ">>SUFFIX<<",
13
  ">>MIDDLE<<"
14
  ],
15
- "eos_token": "<|endoftext|>",
 
 
 
 
 
 
16
  "pad_token": "<|endoftext|>"
17
  }
 
12
  ">>SUFFIX<<",
13
  ">>MIDDLE<<"
14
  ],
15
+ "eos_token": {
16
+ "content": "<|endoftext|>",
17
+ "lstrip": false,
18
+ "normalized": false,
19
+ "rstrip": false,
20
+ "single_word": false
21
+ },
22
  "pad_token": "<|endoftext|>"
23
  }
last-checkpoint/tokenizer_config.json CHANGED
@@ -113,11 +113,15 @@
113
  ],
114
  "clean_up_tokenization_spaces": true,
115
  "eos_token": "<|endoftext|>",
 
116
  "model_input_names": [
117
  "input_ids",
118
  "attention_mask"
119
  ],
120
  "model_max_length": 2048,
121
  "pad_token": "<|endoftext|>",
122
- "tokenizer_class": "PreTrainedTokenizerFast"
 
 
 
123
  }
 
113
  ],
114
  "clean_up_tokenization_spaces": true,
115
  "eos_token": "<|endoftext|>",
116
+ "max_length": 512,
117
  "model_input_names": [
118
  "input_ids",
119
  "attention_mask"
120
  ],
121
  "model_max_length": 2048,
122
  "pad_token": "<|endoftext|>",
123
+ "stride": 0,
124
+ "tokenizer_class": "PreTrainedTokenizerFast",
125
+ "truncation_side": "right",
126
+ "truncation_strategy": "longest_first"
127
  }
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.120961308479309,
3
- "best_model_checkpoint": "./outputs/checkpoint-3600",
4
- "epoch": 2.6229508196721314,
5
  "eval_steps": 100,
6
- "global_step": 3600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -445,79 +445,23 @@
445
  {
446
  "epoch": 2.33,
447
  "learning_rate": 0.0002,
448
- "loss": 1.1435,
449
  "step": 3200
450
  },
451
  {
452
  "epoch": 2.33,
453
- "eval_loss": 1.1588889360427856,
454
- "eval_runtime": 417.5979,
455
- "eval_samples_per_second": 15.024,
456
- "eval_steps_per_second": 1.88,
457
  "step": 3200
458
- },
459
- {
460
- "epoch": 2.4,
461
- "learning_rate": 0.0002,
462
- "loss": 1.127,
463
- "step": 3300
464
- },
465
- {
466
- "epoch": 2.4,
467
- "eval_loss": 1.1483901739120483,
468
- "eval_runtime": 425.2555,
469
- "eval_samples_per_second": 14.753,
470
- "eval_steps_per_second": 1.846,
471
- "step": 3300
472
- },
473
- {
474
- "epoch": 2.48,
475
- "learning_rate": 0.0002,
476
- "loss": 1.1119,
477
- "step": 3400
478
- },
479
- {
480
- "epoch": 2.48,
481
- "eval_loss": 1.139683723449707,
482
- "eval_runtime": 424.1155,
483
- "eval_samples_per_second": 14.793,
484
- "eval_steps_per_second": 1.851,
485
- "step": 3400
486
- },
487
- {
488
- "epoch": 2.55,
489
- "learning_rate": 0.0002,
490
- "loss": 1.1122,
491
- "step": 3500
492
- },
493
- {
494
- "epoch": 2.55,
495
- "eval_loss": 1.130289077758789,
496
- "eval_runtime": 417.8399,
497
- "eval_samples_per_second": 15.015,
498
- "eval_steps_per_second": 1.879,
499
- "step": 3500
500
- },
501
- {
502
- "epoch": 2.62,
503
- "learning_rate": 0.0002,
504
- "loss": 1.1058,
505
- "step": 3600
506
- },
507
- {
508
- "epoch": 2.62,
509
- "eval_loss": 1.120961308479309,
510
- "eval_runtime": 438.2292,
511
- "eval_samples_per_second": 14.317,
512
- "eval_steps_per_second": 1.791,
513
- "step": 3600
514
  }
515
  ],
516
  "logging_steps": 100,
517
  "max_steps": 4116,
518
  "num_train_epochs": 3,
519
  "save_steps": 100,
520
- "total_flos": 1.1670242076660173e+18,
521
  "trial_name": null,
522
  "trial_params": null
523
  }
 
1
  {
2
+ "best_metric": 1.1691069602966309,
3
+ "best_model_checkpoint": "./outputs/checkpoint-3100",
4
+ "epoch": 2.33224043715847,
5
  "eval_steps": 100,
6
+ "global_step": 3200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
445
  {
446
  "epoch": 2.33,
447
  "learning_rate": 0.0002,
448
+ "loss": 1.1534,
449
  "step": 3200
450
  },
451
  {
452
  "epoch": 2.33,
453
+ "eval_loss": 1.1825001239776611,
454
+ "eval_runtime": 339.9447,
455
+ "eval_samples_per_second": 18.456,
456
+ "eval_steps_per_second": 2.309,
457
  "step": 3200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
458
  }
459
  ],
460
  "logging_steps": 100,
461
  "max_steps": 4116,
462
  "num_train_epochs": 3,
463
  "save_steps": 100,
464
+ "total_flos": 1.0375834790343045e+18,
465
  "trial_name": null,
466
  "trial_params": null
467
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ac2a584f6fcab7b83aaf6c33387711020127b74d44cfc40eb76fb8dcafc6325
3
  size 4219
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe5603840bf23a51f167eb469a32e263b0b26363061da6a1848375d241e4e917
3
  size 4219