End of training

Files changed (6) hide show

README.md CHANGED Viewed

@@ -2,10 +2,8 @@
 license: apache-2.0
 library_name: peft
 tags:
-- trl
-- sft
 - generated_from_trainer
-base_model: mistralai/Mistral-7B-v0.1
 model-index:
 - name: results
   results: []
@@ -16,7 +14,7 @@ should probably proofread and complete it, then remove this comment. -->
 # results
-This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on an unknown dataset.
 ## Model description
@@ -35,21 +33,22 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 0.0002
 - train_batch_size: 8
 - eval_batch_size: 8
 - seed: 42
-- gradient_accumulation_steps: 2
-- total_train_batch_size: 16
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
-- lr_scheduler_type: constant
-- lr_scheduler_warmup_ratio: 0.3
 - num_epochs: 2
 ### Framework versions
-- PEFT 0.9.1.dev0
-- Transformers 4.38.2
-- Pytorch 2.2.1+cu121
-- Datasets 2.18.0
-- Tokenizers 0.15.2

 license: apache-2.0
 library_name: peft
 tags:
 - generated_from_trainer
+base_model: google/flan-t5-xxl
 model-index:
 - name: results
   results: []
 # results
+This model is a fine-tuned version of [google/flan-t5-xxl](https://huggingface.co/google/flan-t5-xxl) on the None dataset.
 ## Model description
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 0.001
 - train_batch_size: 8
 - eval_batch_size: 8
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
 - num_epochs: 2
+### Training results
 ### Framework versions
+- PEFT 0.11.1
+- Transformers 4.41.2
+- Pytorch 2.3.0+cu121
+- Datasets 2.19.2
+- Tokenizers 0.19.1

adapter_config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "alpha_pattern": {},
   "auto_mapping": null,
-  "base_model_name_or_path": "mistralai/Mistral-7B-v0.1",
   "bias": "none",
   "fan_in_fan_out": false,
   "inference_mode": true,
@@ -10,7 +10,7 @@
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
-  "lora_alpha": 16,
   "lora_dropout": 0.05,
   "megatron_config": null,
   "megatron_core": "megatron.core",
@@ -20,13 +20,10 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "k_proj",
-    "q_proj",
-    "gate_proj",
-    "v_proj",
-    "o_proj"
   ],
-  "task_type": "CAUSAL_LM",
   "use_dora": false,
   "use_rslora": false
 }

 {
   "alpha_pattern": {},
   "auto_mapping": null,
+  "base_model_name_or_path": null,
   "bias": "none",
   "fan_in_fan_out": false,
   "inference_mode": true,
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
+  "lora_alpha": 32,
   "lora_dropout": 0.05,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "q",
+    "v"
   ],
+  "task_type": "SEQ_2_SEQ_LM",
   "use_dora": false,
   "use_rslora": false
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f54e9c2e377a4849a8dab393aeee9b21b259cdf0ff6c02f6dc342bf7ab183dcf
-size 92317600

 version https://git-lfs.github.com/spec/v1
+oid sha256:f4b483975017f3c426fb4ffbed26e528d6f05ede84101a9aac9b7079d2925a86
+size 75543576

logs/events.out.tfevents.1717495835.2be89d6b90fb.345.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:a880cc54f47f55110ea4539ccdb3a45dcdeaa369ecbf11d0f35bd1ce50885d03
+size 5536

logs/events.out.tfevents.1717495917.2be89d6b90fb.345.1 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:baf3e99cd633d43b0b3e9c423434fc22dd28a794e0f94c8ce1586221533fe810
+size 6312

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5e60add3aa13346270f4e7ba18b52336fb3d7b35819cbb3f7b08dd20c2b76bde
-size 4856

 version https://git-lfs.github.com/spec/v1
+oid sha256:5d6c4cb145e626cffa908d9f2d61ef721813158994d2b9cae4e07ffa605ed191
+size 5176