vincenzodeleo commited on
Commit
4537421
·
verified ·
1 Parent(s): 585c7f5

End of training

Browse files
README.md CHANGED
@@ -2,10 +2,8 @@
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
5
- - trl
6
- - sft
7
  - generated_from_trainer
8
- base_model: mistralai/Mistral-7B-v0.1
9
  model-index:
10
  - name: results
11
  results: []
@@ -16,7 +14,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # results
18
 
19
- This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on an unknown dataset.
20
 
21
  ## Model description
22
 
@@ -35,21 +33,22 @@ More information needed
35
  ### Training hyperparameters
36
 
37
  The following hyperparameters were used during training:
38
- - learning_rate: 0.0002
39
  - train_batch_size: 8
40
  - eval_batch_size: 8
41
  - seed: 42
42
- - gradient_accumulation_steps: 2
43
- - total_train_batch_size: 16
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
- - lr_scheduler_type: constant
46
- - lr_scheduler_warmup_ratio: 0.3
47
  - num_epochs: 2
48
 
 
 
 
 
49
  ### Framework versions
50
 
51
- - PEFT 0.9.1.dev0
52
- - Transformers 4.38.2
53
- - Pytorch 2.2.1+cu121
54
- - Datasets 2.18.0
55
- - Tokenizers 0.15.2
 
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
 
 
5
  - generated_from_trainer
6
+ base_model: google/flan-t5-xxl
7
  model-index:
8
  - name: results
9
  results: []
 
14
 
15
  # results
16
 
17
+ This model is a fine-tuned version of [google/flan-t5-xxl](https://huggingface.co/google/flan-t5-xxl) on the None dataset.
18
 
19
  ## Model description
20
 
 
33
  ### Training hyperparameters
34
 
35
  The following hyperparameters were used during training:
36
+ - learning_rate: 0.001
37
  - train_batch_size: 8
38
  - eval_batch_size: 8
39
  - seed: 42
 
 
40
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
41
+ - lr_scheduler_type: linear
 
42
  - num_epochs: 2
43
 
44
+ ### Training results
45
+
46
+
47
+
48
  ### Framework versions
49
 
50
+ - PEFT 0.11.1
51
+ - Transformers 4.41.2
52
+ - Pytorch 2.3.0+cu121
53
+ - Datasets 2.19.2
54
+ - Tokenizers 0.19.1
adapter_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "mistralai/Mistral-7B-v0.1",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
@@ -10,7 +10,7 @@
10
  "layers_pattern": null,
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
- "lora_alpha": 16,
14
  "lora_dropout": 0.05,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
@@ -20,13 +20,10 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "k_proj",
24
- "q_proj",
25
- "gate_proj",
26
- "v_proj",
27
- "o_proj"
28
  ],
29
- "task_type": "CAUSAL_LM",
30
  "use_dora": false,
31
  "use_rslora": false
32
  }
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": null,
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
 
10
  "layers_pattern": null,
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
+ "lora_alpha": 32,
14
  "lora_dropout": 0.05,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "q",
24
+ "v"
 
 
 
25
  ],
26
+ "task_type": "SEQ_2_SEQ_LM",
27
  "use_dora": false,
28
  "use_rslora": false
29
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f54e9c2e377a4849a8dab393aeee9b21b259cdf0ff6c02f6dc342bf7ab183dcf
3
- size 92317600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4b483975017f3c426fb4ffbed26e528d6f05ede84101a9aac9b7079d2925a86
3
+ size 75543576
logs/events.out.tfevents.1717495835.2be89d6b90fb.345.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a880cc54f47f55110ea4539ccdb3a45dcdeaa369ecbf11d0f35bd1ce50885d03
3
+ size 5536
logs/events.out.tfevents.1717495917.2be89d6b90fb.345.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:baf3e99cd633d43b0b3e9c423434fc22dd28a794e0f94c8ce1586221533fe810
3
+ size 6312
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e60add3aa13346270f4e7ba18b52336fb3d7b35819cbb3f7b08dd20c2b76bde
3
- size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d6c4cb145e626cffa908d9f2d61ef721813158994d2b9cae4e07ffa605ed191
3
+ size 5176