Model save

Browse files

Files changed (15) hide show

checkpoint-16932/README.md → README.md +0 -12
adapter_model.safetensors +1 -1
checkpoint-16932/adapter_config.json +0 -21
checkpoint-16932/adapter_model.safetensors +0 -3
checkpoint-16932/optimizer.pt +0 -3
checkpoint-16932/rng_state.pth +0 -3
checkpoint-16932/special_tokens_map.json +0 -24
checkpoint-16932/tokenizer.json +0 -0
checkpoint-16932/tokenizer_config.json +0 -32
checkpoint-16932/trainer_state.json +0 -254
checkpoint-16932/training_args.bin +0 -3
checkpoint-33860/README.md +12 -0
{checkpoint-16932 → checkpoint-33860}/adapter_model.bin +1 -1
runs/Dec01_03-27-22_baa8ebef2c59/events.out.tfevents.1701401247.baa8ebef2c59.118280.1 +2 -2
checkpoint-16932/scheduler.pt → runs/Dec01_03-27-22_baa8ebef2c59/events.out.tfevents.1701457316.baa8ebef2c59.118280.2 +2 -2

checkpoint-16932/README.md → README.md RENAMED Viewed

@@ -4,17 +4,6 @@ library_name: peft
 ## Training procedure
-The following `bitsandbytes` quantization config was used during training:
-- load_in_8bit: False
-- load_in_4bit: True
-- llm_int8_threshold: 6.0
-- llm_int8_skip_modules: None
-- llm_int8_enable_fp32_cpu_offload: False
-- llm_int8_has_fp16_weight: False
-- bnb_4bit_quant_type: nf4
-- bnb_4bit_use_double_quant: True
-- bnb_4bit_compute_dtype: bfloat16
 The following `bitsandbytes` quantization config was used during training:
 - load_in_8bit: False
 - load_in_4bit: True
@@ -27,6 +16,5 @@ The following `bitsandbytes` quantization config was used during training:
 - bnb_4bit_compute_dtype: bfloat16
 ### Framework versions
-- PEFT 0.4.0
 - PEFT 0.4.0

 ## Training procedure
 The following `bitsandbytes` quantization config was used during training:
 - load_in_8bit: False
 - load_in_4bit: True
 - bnb_4bit_compute_dtype: bfloat16
 ### Framework versions
 - PEFT 0.4.0

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d9d2b3d4cac18b0038832d940fe40145fbe984bb714203040ef3e853ecc77691
 size 26235704

 version https://git-lfs.github.com/spec/v1
+oid sha256:e552727933b84bb09190a6ecb7d8bbc669472f0efc17be36189f9aa688f2eabc
 size 26235704

checkpoint-16932/adapter_config.json DELETED Viewed

@@ -1,21 +0,0 @@
-{
-  "auto_mapping": null,
-  "base_model_name_or_path": "NousResearch/Nous-Hermes-Llama2-13b",
-  "bias": "none",
-  "fan_in_fan_out": false,
-  "inference_mode": true,
-  "init_lora_weights": true,
-  "layers_pattern": null,
-  "layers_to_transform": null,
-  "lora_alpha": 16,
-  "lora_dropout": 0.1,
-  "modules_to_save": null,
-  "peft_type": "LORA",
-  "r": 8,
-  "revision": null,
-  "target_modules": [
-    "q_proj",
-    "v_proj"
-  ],
-  "task_type": "CAUSAL_LM"
-}

checkpoint-16932/adapter_model.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:85092bad7d086fd774fcdcecddb04e151a94a4251955f258d495af6a2bc541ee
-size 26235704

checkpoint-16932/optimizer.pt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a7654149037db968eb318840063616c20bd3a953462c3ac7a396699aff568e46
-size 52562821

checkpoint-16932/rng_state.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:388af3ba8f29d0cbff6a42906d5e1935d64721067b0665d100d728026c00ff20
-size 14575

checkpoint-16932/special_tokens_map.json DELETED Viewed

@@ -1,24 +0,0 @@
-{
-  "bos_token": {
-    "content": "<s>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "eos_token": {
-    "content": "</s>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "pad_token": "<unk>",
-  "unk_token": {
-    "content": "<unk>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  }
-}

checkpoint-16932/tokenizer.json DELETED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-16932/tokenizer_config.json DELETED Viewed

@@ -1,32 +0,0 @@
-{
-  "bos_token": {
-    "__type": "AddedToken",
-    "content": "<s>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "clean_up_tokenization_spaces": false,
-  "eos_token": {
-    "__type": "AddedToken",
-    "content": "</s>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "legacy": false,
-  "model_max_length": 1000000000000000019884624838656,
-  "pad_token": null,
-  "sp_model_kwargs": {},
-  "tokenizer_class": "LlamaTokenizer",
-  "unk_token": {
-    "__type": "AddedToken",
-    "content": "<unk>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  }
-}

checkpoint-16932/trainer_state.json DELETED Viewed

@@ -1,254 +0,0 @@
-{
-  "best_metric": 0.7306132912635803,
-  "best_model_checkpoint": "./Zeroshot/01-12-23-NousResearch-Nous-Hermes-Llama2-13b_multilang-dataset-3.0.3-portuguese-2_epochs-10_batch_2/checkpoints/checkpoint-16932",
-  "epoch": 4.999852354938727,
-  "global_step": 16932,
-  "is_hyper_param_search": false,
-  "is_local_process_zero": true,
-  "is_world_process_zero": true,
-  "log_history": [
-    {
-      "epoch": 0.15,
-      "learning_rate": 5.8948611931482576e-05,
-      "loss": 1.3183,
-      "step": 500
-    },
-    {
-      "epoch": 0.3,
-      "learning_rate": 0.00011801535735380982,
-      "loss": 0.9343,
-      "step": 1000
-    },
-    {
-      "epoch": 0.44,
-      "learning_rate": 0.00017708210277613703,
-      "loss": 0.9005,
-      "step": 1500
-    },
-    {
-      "epoch": 0.59,
-      "learning_rate": 0.00023614884819846425,
-      "loss": 0.8859,
-      "step": 2000
-    },
-    {
-      "epoch": 0.74,
-      "learning_rate": 0.0002952155936207915,
-      "loss": 0.8707,
-      "step": 2500
-    },
-    {
-      "epoch": 0.89,
-      "learning_rate": 0.00035416420555227406,
-      "loss": 0.8523,
-      "step": 3000
-    },
-    {
-      "epoch": 1.0,
-      "eval_loss": 0.8459745645523071,
-      "eval_runtime": 133.0397,
-      "eval_samples_per_second": 11.32,
-      "eval_steps_per_second": 1.421,
-      "step": 3386
-    },
-    {
-      "epoch": 1.03,
-      "learning_rate": 0.00039998666871418663,
-      "loss": 0.8395,
-      "step": 3500
-    },
-    {
-      "epoch": 1.18,
-      "learning_rate": 0.00039960207649127196,
-      "loss": 0.8052,
-      "step": 4000
-    },
-    {
-      "epoch": 1.33,
-      "learning_rate": 0.0003986872720605752,
-      "loss": 0.8036,
-      "step": 4500
-    },
-    {
-      "epoch": 1.48,
-      "learning_rate": 0.00039724468545932536,
-      "loss": 0.7902,
-      "step": 5000
-    },
-    {
-      "epoch": 1.62,
-      "learning_rate": 0.0003952781486968895,
-      "loss": 0.7818,
-      "step": 5500
-    },
-    {
-      "epoch": 1.77,
-      "learning_rate": 0.00039279288557562877,
-      "loss": 0.7834,
-      "step": 6000
-    },
-    {
-      "epoch": 1.92,
-      "learning_rate": 0.0003897954978146717,
-      "loss": 0.7686,
-      "step": 6500
-    },
-    {
-      "epoch": 2.0,
-      "eval_loss": 0.7881951332092285,
-      "eval_runtime": 132.7462,
-      "eval_samples_per_second": 11.345,
-      "eval_steps_per_second": 1.424,
-      "step": 6773
-    },
-    {
-      "epoch": 2.07,
-      "learning_rate": 0.0003862939475134625,
-      "loss": 0.7371,
-      "step": 7000
-    },
-    {
-      "epoch": 2.21,
-      "learning_rate": 0.00038230601585633047,
-      "loss": 0.6893,
-      "step": 7500
-    },
-    {
-      "epoch": 2.36,
-      "learning_rate": 0.0003778263160223725,
-      "loss": 0.6936,
-      "step": 8000
-    },
-    {
-      "epoch": 2.51,
-      "learning_rate": 0.00037287424793815686,
-      "loss": 0.6923,
-      "step": 8500
-    },
-    {
-      "epoch": 2.66,
-      "learning_rate": 0.0003674629660106825,
-      "loss": 0.6862,
-      "step": 9000
-    },
-    {
-      "epoch": 2.81,
-      "learning_rate": 0.00036160684447787385,
-      "loss": 0.6869,
-      "step": 9500
-    },
-    {
-      "epoch": 2.95,
-      "learning_rate": 0.0003553214392256293,
-      "loss": 0.6856,
-      "step": 10000
-    },
-    {
-      "epoch": 3.0,
-      "eval_loss": 0.7562845945358276,
-      "eval_runtime": 132.7178,
-      "eval_samples_per_second": 11.347,
-      "eval_steps_per_second": 1.424,
-      "step": 10159
-    },
-    {
-      "epoch": 3.1,
-      "learning_rate": 0.0003486234464660031,
-      "loss": 0.6199,
-      "step": 10500
-    },
-    {
-      "epoch": 3.25,
-      "learning_rate": 0.0003415452256149365,
-      "loss": 0.595,
-      "step": 11000
-    },
-    {
-      "epoch": 3.4,
-      "learning_rate": 0.0003340772155486902,
-      "loss": 0.5962,
-      "step": 11500
-    },
-    {
-      "epoch": 3.54,
-      "learning_rate": 0.0003262690400990177,
-      "loss": 0.6019,
-      "step": 12000
-    },
-    {
-      "epoch": 3.69,
-      "learning_rate": 0.00031811015213363493,
-      "loss": 0.6043,
-      "step": 12500
-    },
-    {
-      "epoch": 3.84,
-      "learning_rate": 0.0003096375227201415,
-      "loss": 0.5984,
-      "step": 13000
-    },
-    {
-      "epoch": 3.99,
-      "learning_rate": 0.0003008736580950757,
-      "loss": 0.6011,
-      "step": 13500
-    },
-    {
-      "epoch": 4.0,
-      "eval_loss": 0.738968551158905,
-      "eval_runtime": 132.6823,
-      "eval_samples_per_second": 11.35,
-      "eval_steps_per_second": 1.424,
-      "step": 13546
-    },
-    {
-      "epoch": 4.13,
-      "learning_rate": 0.00029184183811650945,
-      "loss": 0.5125,
-      "step": 14000
-    },
-    {
-      "epoch": 4.28,
-      "learning_rate": 0.00028256605442468665,
-      "loss": 0.5143,
-      "step": 14500
-    },
-    {
-      "epoch": 4.43,
-      "learning_rate": 0.00027307094671192217,
-      "loss": 0.5171,
-      "step": 15000
-    },
-    {
-      "epoch": 4.58,
-      "learning_rate": 0.0002633817372710499,
-      "loss": 0.5133,
-      "step": 15500
-    },
-    {
-      "epoch": 4.72,
-      "learning_rate": 0.00025352416399628225,
-      "loss": 0.5155,
-      "step": 16000
-    },
-    {
-      "epoch": 4.87,
-      "learning_rate": 0.0002435244120144539,
-      "loss": 0.5217,
-      "step": 16500
-    },
-    {
-      "epoch": 5.0,
-      "eval_loss": 0.7306132912635803,
-      "eval_runtime": 132.674,
-      "eval_samples_per_second": 11.351,
-      "eval_steps_per_second": 1.425,
-      "step": 16932
-    }
-  ],
-  "max_steps": 33860,
-  "num_train_epochs": 10,
-  "total_flos": 7.54467499294679e+17,
-  "trial_name": null,
-  "trial_params": null
-}

checkpoint-16932/training_args.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:5fb2200feb70e8262514df4fd89c2cc8501a63e46178c2943084ed504f58b222
-size 4283

checkpoint-33860/README.md CHANGED Viewed

@@ -4,6 +4,17 @@ library_name: peft
 ## Training procedure
 The following `bitsandbytes` quantization config was used during training:
 - load_in_8bit: False
 - load_in_4bit: True
@@ -16,5 +27,6 @@ The following `bitsandbytes` quantization config was used during training:
 - bnb_4bit_compute_dtype: bfloat16
 ### Framework versions
 - PEFT 0.4.0

 ## Training procedure
+The following `bitsandbytes` quantization config was used during training:
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
 The following `bitsandbytes` quantization config was used during training:
 - load_in_8bit: False
 - load_in_4bit: True
 - bnb_4bit_compute_dtype: bfloat16
 ### Framework versions
+- PEFT 0.4.0
 - PEFT 0.4.0

{checkpoint-16932 → checkpoint-33860}/adapter_model.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0bec7c5a936a7abc8d5ef91b3a7aafce166627fd034b75f17eb0797c198f405f
 size 26271757

 version https://git-lfs.github.com/spec/v1
+oid sha256:39be1ac10d9af1f35e45fca3b9fb4661cef7a2b03b73aaa39b0e7f089bfeddd8
 size 26271757

runs/Dec01_03-27-22_baa8ebef2c59/events.out.tfevents.1701401247.baa8ebef2c59.118280.1 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:752c4fdfaeb486837f616afa0776168c637f78e369a638462e518adf13065ea7
-size 18128

 version https://git-lfs.github.com/spec/v1
+oid sha256:f5f67fd7c48800ff9cc0b07ec211dbfb54eb12757b61ff6c6e0dc625dc34148d
+size 18488

checkpoint-16932/scheduler.pt → runs/Dec01_03-27-22_baa8ebef2c59/events.out.tfevents.1701457316.baa8ebef2c59.118280.2 RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:40bea906a4eaa39d93a755eec22eae65bead508a251674c24663c32a980490f4
-size 627

 version https://git-lfs.github.com/spec/v1
+oid sha256:74a56b3b3f477606c88acf0cb94d8f58d56c0a4bd336a3a20096dd88aaef66e6
+size 364