Tejaswi006 commited on
Commit
ef79740
1 Parent(s): de86475

Model save

Browse files
README.md CHANGED
@@ -15,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 1.2383
19
 
20
  ## Model description
21
 
@@ -49,10 +49,10 @@ The following hyperparameters were used during training:
49
 
50
  | Training Loss | Epoch | Step | Validation Loss |
51
  |:-------------:|:-----:|:----:|:---------------:|
52
- | 1.3801 | 0.35 | 4 | 1.3500 |
53
- | 1.3615 | 1.32 | 8 | 1.3000 |
54
- | 1.3064 | 2.3 | 12 | 1.2652 |
55
- | 1.2626 | 3.37 | 17 | 1.2383 |
56
 
57
 
58
  ### Framework versions
 
15
 
16
  This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 1.2639
19
 
20
  ## Model description
21
 
 
49
 
50
  | Training Loss | Epoch | Step | Validation Loss |
51
  |:-------------:|:-----:|:----:|:---------------:|
52
+ | 1.3794 | 0.35 | 4 | 1.3587 |
53
+ | 1.3642 | 1.33 | 8 | 1.3150 |
54
+ | 1.3179 | 2.32 | 12 | 1.2844 |
55
+ | 1.2789 | 3.31 | 16 | 1.2639 |
56
 
57
 
58
  ### Framework versions
adapter_config.json CHANGED
@@ -16,10 +16,10 @@
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
- "k_proj",
20
  "o_proj",
21
- "q_proj",
22
- "v_proj"
23
  ],
24
  "task_type": "CAUSAL_LM"
25
  }
 
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
+ "v_proj",
20
  "o_proj",
21
+ "k_proj",
22
+ "q_proj"
23
  ],
24
  "task_type": "CAUSAL_LM"
25
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:733d047eede5712a63861b5f2821d0f11d45ce7cc9eef9931d98de25e00c1e80
3
  size 218138576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1bbddabca9e32610bf4f4421581d89645180024f03a7565555eb2c96833b04e
3
  size 218138576
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 3.37,
3
- "eval_loss": 1.2383112907409668,
4
- "eval_runtime": 22.4776,
5
  "eval_samples": 294,
6
- "eval_samples_per_second": 13.08,
7
- "eval_steps_per_second": 1.646,
8
- "train_loss": 1.30483017949497,
9
- "train_runtime": 5957.4264,
10
  "train_samples": 5895,
11
- "train_samples_per_second": 3.958,
12
- "train_steps_per_second": 0.007
13
  }
 
1
  {
2
+ "epoch": 3.31,
3
+ "eval_loss": 1.2639317512512207,
4
+ "eval_runtime": 21.5816,
5
  "eval_samples": 294,
6
+ "eval_samples_per_second": 13.623,
7
+ "eval_steps_per_second": 1.714,
8
+ "train_loss": 1.3643869757652283,
9
+ "train_runtime": 5803.8585,
10
  "train_samples": 5895,
11
+ "train_samples_per_second": 4.063,
12
+ "train_steps_per_second": 0.008
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.37,
3
- "eval_loss": 1.2383112907409668,
4
- "eval_runtime": 22.4776,
5
  "eval_samples": 294,
6
- "eval_samples_per_second": 13.08,
7
- "eval_steps_per_second": 1.646
8
  }
 
1
  {
2
+ "epoch": 3.31,
3
+ "eval_loss": 1.2639317512512207,
4
+ "eval_runtime": 21.5816,
5
  "eval_samples": 294,
6
+ "eval_samples_per_second": 13.623,
7
+ "eval_steps_per_second": 1.714
8
  }
runs/Dec15_08-46-37_s4311/events.out.tfevents.1702630077.s4311.1830444.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2869b4e1968c0a63c1bd3d210af787e49856193920f93a8ca1a91840d8f5fac
3
+ size 6281
runs/Dec15_08-46-37_s4311/events.out.tfevents.1702635902.s4311.1830444.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8a756318d4a2d99d3ee6104c17242bf475dbe702b392c470108f73cc4f626fc
3
+ size 354
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.37,
3
- "train_loss": 1.30483017949497,
4
- "train_runtime": 5957.4264,
5
  "train_samples": 5895,
6
- "train_samples_per_second": 3.958,
7
- "train_steps_per_second": 0.007
8
  }
 
1
  {
2
+ "epoch": 3.31,
3
+ "train_loss": 1.3643869757652283,
4
+ "train_runtime": 5803.8585,
5
  "train_samples": 5895,
6
+ "train_samples_per_second": 4.063,
7
+ "train_steps_per_second": 0.008
8
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.3670284938941655,
5
  "eval_steps": 500,
6
- "global_step": 17,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -11,74 +11,74 @@
11
  {
12
  "epoch": 0.09,
13
  "learning_rate": 1.9974521146102535e-05,
14
- "loss": 1.3801,
15
  "step": 1
16
  },
17
  {
18
  "epoch": 0.35,
19
- "eval_loss": 1.3500080108642578,
20
- "eval_runtime": 22.5154,
21
- "eval_samples_per_second": 13.058,
22
- "eval_steps_per_second": 1.643,
23
  "step": 4
24
  },
25
  {
26
- "epoch": 1.06,
27
  "learning_rate": 1.936949724999762e-05,
28
- "loss": 1.3615,
29
  "step": 5
30
  },
31
  {
32
- "epoch": 1.32,
33
- "eval_loss": 1.2999709844589233,
34
- "eval_runtime": 22.4738,
35
- "eval_samples_per_second": 13.082,
36
- "eval_steps_per_second": 1.646,
37
  "step": 8
38
  },
39
  {
40
- "epoch": 2.13,
41
  "learning_rate": 1.7557495743542586e-05,
42
- "loss": 1.3064,
43
  "step": 10
44
  },
45
  {
46
- "epoch": 2.3,
47
- "eval_loss": 1.265210747718811,
48
- "eval_runtime": 22.4153,
49
- "eval_samples_per_second": 13.116,
50
- "eval_steps_per_second": 1.651,
51
  "step": 12
52
  },
53
  {
54
- "epoch": 3.19,
55
  "learning_rate": 1.479248986720057e-05,
56
- "loss": 1.2626,
57
  "step": 15
58
  },
59
  {
60
- "epoch": 3.37,
61
- "eval_loss": 1.2383277416229248,
62
- "eval_runtime": 22.5157,
63
- "eval_samples_per_second": 13.058,
64
- "eval_steps_per_second": 1.643,
65
- "step": 17
66
  },
67
  {
68
- "epoch": 3.37,
69
- "step": 17,
70
- "total_flos": 7.670550714342441e+17,
71
- "train_loss": 1.30483017949497,
72
- "train_runtime": 5957.4264,
73
- "train_samples_per_second": 3.958,
74
- "train_steps_per_second": 0.007
75
  }
76
  ],
77
  "logging_steps": 5,
78
  "max_steps": 44,
79
  "num_train_epochs": 4,
80
  "save_steps": 500,
81
- "total_flos": 7.670550714342441e+17,
82
  "trial_name": null,
83
  "trial_params": null
84
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.3086838534599727,
5
  "eval_steps": 500,
6
+ "global_step": 16,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
11
  {
12
  "epoch": 0.09,
13
  "learning_rate": 1.9974521146102535e-05,
14
+ "loss": 1.3794,
15
  "step": 1
16
  },
17
  {
18
  "epoch": 0.35,
19
+ "eval_loss": 1.3587424755096436,
20
+ "eval_runtime": 21.5907,
21
+ "eval_samples_per_second": 13.617,
22
+ "eval_steps_per_second": 1.714,
23
  "step": 4
24
  },
25
  {
26
+ "epoch": 1.07,
27
  "learning_rate": 1.936949724999762e-05,
28
+ "loss": 1.3642,
29
  "step": 5
30
  },
31
  {
32
+ "epoch": 1.33,
33
+ "eval_loss": 1.3150031566619873,
34
+ "eval_runtime": 21.6398,
35
+ "eval_samples_per_second": 13.586,
36
+ "eval_steps_per_second": 1.71,
37
  "step": 8
38
  },
39
  {
40
+ "epoch": 2.15,
41
  "learning_rate": 1.7557495743542586e-05,
42
+ "loss": 1.3179,
43
  "step": 10
44
  },
45
  {
46
+ "epoch": 2.32,
47
+ "eval_loss": 1.2843962907791138,
48
+ "eval_runtime": 21.6342,
49
+ "eval_samples_per_second": 13.59,
50
+ "eval_steps_per_second": 1.71,
51
  "step": 12
52
  },
53
  {
54
+ "epoch": 3.22,
55
  "learning_rate": 1.479248986720057e-05,
56
+ "loss": 1.2789,
57
  "step": 15
58
  },
59
  {
60
+ "epoch": 3.31,
61
+ "eval_loss": 1.2639315128326416,
62
+ "eval_runtime": 21.5826,
63
+ "eval_samples_per_second": 13.622,
64
+ "eval_steps_per_second": 1.714,
65
+ "step": 16
66
  },
67
  {
68
+ "epoch": 3.31,
69
+ "step": 16,
70
+ "total_flos": 7.473328135392461e+17,
71
+ "train_loss": 1.3643869757652283,
72
+ "train_runtime": 5803.8585,
73
+ "train_samples_per_second": 4.063,
74
+ "train_steps_per_second": 0.008
75
  }
76
  ],
77
  "logging_steps": 5,
78
  "max_steps": 44,
79
  "num_train_epochs": 4,
80
  "save_steps": 500,
81
+ "total_flos": 7.473328135392461e+17,
82
  "trial_name": null,
83
  "trial_params": null
84
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f1941f27569a238b088377cad71a086145d5d62cbf0cd926c9aae68936ed966
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47ec4ee1c57eaea2d727d4a772f2f4aa6d66464ed94ca07f89cb5d80aa8f6b36
3
  size 4664