sujithatz commited on
Commit
f5c4cea
1 Parent(s): fc022ef

sujithatz/finbot-transofrmer-based-phi3.5_adapter

Browse files
README.md CHANGED
@@ -17,6 +17,8 @@ should probably proofread and complete it, then remove this comment. -->
17
  # phi-3-mini-LoRA
18
 
19
  This model is a fine-tuned version of [microsoft/Phi-3.5-mini-instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) on an unknown dataset.
 
 
20
 
21
  ## Model description
22
 
@@ -44,10 +46,36 @@ The following hyperparameters were used during training:
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: linear
46
  - lr_scheduler_warmup_steps: 5
47
- - training_steps: 5
48
 
49
  ### Training results
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
 
53
  ### Framework versions
 
17
  # phi-3-mini-LoRA
18
 
19
  This model is a fine-tuned version of [microsoft/Phi-3.5-mini-instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) on an unknown dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 0.7215
22
 
23
  ## Model description
24
 
 
46
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
  - lr_scheduler_type: linear
48
  - lr_scheduler_warmup_steps: 5
49
+ - training_steps: 120
50
 
51
  ### Training results
52
 
53
+ | Training Loss | Epoch | Step | Validation Loss |
54
+ |:-------------:|:-------:|:----:|:---------------:|
55
+ | 1.4909 | 1.1765 | 5 | 1.3154 |
56
+ | 0.9704 | 2.3529 | 10 | 0.9087 |
57
+ | 0.6673 | 3.5294 | 15 | 0.6343 |
58
+ | 0.4418 | 4.7059 | 20 | 0.5075 |
59
+ | 0.3375 | 5.8824 | 25 | 0.4491 |
60
+ | 0.3033 | 7.0588 | 30 | 0.4069 |
61
+ | 0.244 | 8.2353 | 35 | 0.3828 |
62
+ | 0.2285 | 9.4118 | 40 | 0.3759 |
63
+ | 0.1519 | 10.5882 | 45 | 0.3896 |
64
+ | 0.1334 | 11.7647 | 50 | 0.4114 |
65
+ | 0.099 | 12.9412 | 55 | 0.4291 |
66
+ | 0.0823 | 14.1176 | 60 | 0.4610 |
67
+ | 0.06 | 15.2941 | 65 | 0.4894 |
68
+ | 0.0548 | 16.4706 | 70 | 0.5345 |
69
+ | 0.0437 | 17.6471 | 75 | 0.5747 |
70
+ | 0.0409 | 18.8235 | 80 | 0.6059 |
71
+ | 0.0386 | 20.0 | 85 | 0.6349 |
72
+ | 0.0272 | 21.1765 | 90 | 0.6590 |
73
+ | 0.0262 | 22.3529 | 95 | 0.6933 |
74
+ | 0.0303 | 23.5294 | 100 | 0.6960 |
75
+ | 0.0249 | 24.7059 | 105 | 0.7021 |
76
+ | 0.0291 | 25.8824 | 110 | 0.7173 |
77
+ | 0.0255 | 27.0588 | 115 | 0.7195 |
78
+ | 0.0208 | 28.2353 | 120 | 0.7215 |
79
 
80
 
81
  ### Framework versions
adapter_config.json CHANGED
@@ -1,9 +1,6 @@
1
  {
2
  "alpha_pattern": {},
3
- "auto_mapping": {
4
- "base_model_class": "Phi3ForCausalLM",
5
- "parent_library": "transformers_modules.microsoft.Phi-3.5-mini-instruct.af0dfb8029e8a74545d0736d30cb6b58d2f0f3f0.modeling_phi3"
6
- },
7
  "base_model_name_or_path": "microsoft/Phi-3.5-mini-instruct",
8
  "bias": "none",
9
  "fan_in_fan_out": false,
@@ -12,9 +9,9 @@
12
  "layer_replication": null,
13
  "layers_pattern": null,
14
  "layers_to_transform": null,
15
- "loftq_config": null,
16
- "lora_alpha": 32,
17
- "lora_dropout": 0.05,
18
  "megatron_config": null,
19
  "megatron_core": "megatron.core",
20
  "modules_to_save": null,
@@ -23,15 +20,12 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "v_proj",
27
- "down_proj",
28
- "q_proj",
29
- "gate_proj",
30
- "k_proj",
31
  "o_proj",
32
- "up_proj"
 
 
33
  ],
34
- "task_type": null,
35
  "use_dora": false,
36
  "use_rslora": false
37
  }
 
1
  {
2
  "alpha_pattern": {},
3
+ "auto_mapping": null,
 
 
 
4
  "base_model_name_or_path": "microsoft/Phi-3.5-mini-instruct",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
 
9
  "layer_replication": null,
10
  "layers_pattern": null,
11
  "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": null,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
 
 
 
23
  "o_proj",
24
+ "down_proj",
25
+ "gate_up_proj",
26
+ "qkv_proj"
27
  ],
28
+ "task_type": "CAUSAL_LM",
29
  "use_dora": false,
30
  "use_rslora": false
31
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:443965e8be292a2bcd1ef4837df3c4f3a69ab40d05701c88346764362f778a84
3
- size 35668592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d0d087ec02232d98e64e6f6b528eebfa7ca7a0bf61f2f00fe1c0991fe80fee6
3
+ size 100697728
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e75e7e86a3fc1a236b9d82a1f17d4139e53851f26782b1abb4f2640be7a11dea
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae64cfbc9fafa79992f8f1dbc59d731406c1a3c9322aa24ad9ba448b90f16c6e
3
  size 5432