Tejaswi006 commited on
Commit
fcb4856
1 Parent(s): c989dcb

Model save

Browse files
README.md CHANGED
@@ -15,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 1.6381
19
 
20
  ## Model description
21
 
@@ -49,10 +49,10 @@ The following hyperparameters were used during training:
49
 
50
  | Training Loss | Epoch | Step | Validation Loss |
51
  |:-------------:|:-----:|:----:|:---------------:|
52
- | 1.7095 | 0.09 | 1 | 1.7014 |
53
- | 1.7095 | 1.07 | 2 | 1.6855 |
54
- | 1.7095 | 2.06 | 3 | 1.6637 |
55
- | 1.7095 | 3.04 | 4 | 1.6382 |
56
 
57
 
58
  ### Framework versions
 
15
 
16
  This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 1.2383
19
 
20
  ## Model description
21
 
 
49
 
50
  | Training Loss | Epoch | Step | Validation Loss |
51
  |:-------------:|:-----:|:----:|:---------------:|
52
+ | 1.3801 | 0.35 | 4 | 1.3500 |
53
+ | 1.3615 | 1.32 | 8 | 1.3000 |
54
+ | 1.3064 | 2.3 | 12 | 1.2652 |
55
+ | 1.2626 | 3.37 | 17 | 1.2383 |
56
 
57
 
58
  ### Framework versions
adapter_config.json CHANGED
@@ -16,10 +16,10 @@
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
- "v_proj",
20
- "q_proj",
21
  "k_proj",
22
- "o_proj"
 
 
23
  ],
24
  "task_type": "CAUSAL_LM"
25
  }
 
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
 
 
19
  "k_proj",
20
+ "o_proj",
21
+ "q_proj",
22
+ "v_proj"
23
  ],
24
  "task_type": "CAUSAL_LM"
25
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0cbcd1726281528770049545897e4929fdbcd2e3c520f2cd8517683dc9413b7b
3
  size 218138576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:733d047eede5712a63861b5f2821d0f11d45ce7cc9eef9931d98de25e00c1e80
3
  size 218138576
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 3.04,
3
- "eval_loss": 1.6381348371505737,
4
- "eval_runtime": 6.3457,
5
  "eval_samples": 294,
6
- "eval_samples_per_second": 46.331,
7
- "eval_steps_per_second": 5.831,
8
- "train_loss": 1.9840126037597656,
9
- "train_runtime": 1653.0192,
10
  "train_samples": 5895,
11
- "train_samples_per_second": 14.265,
12
- "train_steps_per_second": 0.027
13
  }
 
1
  {
2
+ "epoch": 3.37,
3
+ "eval_loss": 1.2383112907409668,
4
+ "eval_runtime": 22.4776,
5
  "eval_samples": 294,
6
+ "eval_samples_per_second": 13.08,
7
+ "eval_steps_per_second": 1.646,
8
+ "train_loss": 1.30483017949497,
9
+ "train_runtime": 5957.4264,
10
  "train_samples": 5895,
11
+ "train_samples_per_second": 3.958,
12
+ "train_steps_per_second": 0.007
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.04,
3
- "eval_loss": 1.6381348371505737,
4
- "eval_runtime": 6.3457,
5
  "eval_samples": 294,
6
- "eval_samples_per_second": 46.331,
7
- "eval_steps_per_second": 5.831
8
  }
 
1
  {
2
+ "epoch": 3.37,
3
+ "eval_loss": 1.2383112907409668,
4
+ "eval_runtime": 22.4776,
5
  "eval_samples": 294,
6
+ "eval_samples_per_second": 13.08,
7
+ "eval_steps_per_second": 1.646
8
  }
runs/Dec15_05-26-11_s4311/events.out.tfevents.1702618051.s4311.1026368.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17f33fcbcaf4a4ec5a6538ae3e5c63fe8a143b8ed099bf9b8607c390736e5aa8
3
+ size 6281
runs/Dec15_05-26-11_s4311/events.out.tfevents.1702624031.s4311.1026368.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fa0b59f1d02d339bea48d497f4ff5acc85dc6a989e62ed82b0b704ead91ab41
3
+ size 354
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.04,
3
- "train_loss": 1.9840126037597656,
4
- "train_runtime": 1653.0192,
5
  "train_samples": 5895,
6
- "train_samples_per_second": 14.265,
7
- "train_steps_per_second": 0.027
8
  }
 
1
  {
2
+ "epoch": 3.37,
3
+ "train_loss": 1.30483017949497,
4
+ "train_runtime": 5957.4264,
5
  "train_samples": 5895,
6
+ "train_samples_per_second": 3.958,
7
+ "train_steps_per_second": 0.007
8
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.0400271370420624,
5
  "eval_steps": 500,
6
- "global_step": 4,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -11,56 +11,74 @@
11
  {
12
  "epoch": 0.09,
13
  "learning_rate": 1.9974521146102535e-05,
14
- "loss": 1.7095,
15
  "step": 1
16
  },
17
  {
18
- "epoch": 0.09,
19
- "eval_loss": 1.7013664245605469,
20
- "eval_runtime": 6.3791,
21
- "eval_samples_per_second": 46.088,
22
- "eval_steps_per_second": 5.8,
23
- "step": 1
24
  },
25
  {
26
- "epoch": 1.07,
27
- "eval_loss": 1.6854876279830933,
28
- "eval_runtime": 6.3439,
29
- "eval_samples_per_second": 46.344,
30
- "eval_steps_per_second": 5.832,
31
- "step": 2
32
  },
33
  {
34
- "epoch": 2.06,
35
- "eval_loss": 1.663727045059204,
36
- "eval_runtime": 6.3388,
37
- "eval_samples_per_second": 46.381,
38
- "eval_steps_per_second": 5.837,
39
- "step": 3
40
  },
41
  {
42
- "epoch": 3.04,
43
- "eval_loss": 1.6381714344024658,
44
- "eval_runtime": 6.3614,
45
- "eval_samples_per_second": 46.216,
46
- "eval_steps_per_second": 5.816,
47
- "step": 4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  },
49
  {
50
- "epoch": 3.04,
51
- "step": 4,
52
- "total_flos": 2.123664876067881e+17,
53
- "train_loss": 1.9840126037597656,
54
- "train_runtime": 1653.0192,
55
- "train_samples_per_second": 14.265,
56
- "train_steps_per_second": 0.027
57
  }
58
  ],
59
  "logging_steps": 5,
60
  "max_steps": 44,
61
  "num_train_epochs": 4,
62
  "save_steps": 500,
63
- "total_flos": 2.123664876067881e+17,
64
  "trial_name": null,
65
  "trial_params": null
66
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.3670284938941655,
5
  "eval_steps": 500,
6
+ "global_step": 17,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
11
  {
12
  "epoch": 0.09,
13
  "learning_rate": 1.9974521146102535e-05,
14
+ "loss": 1.3801,
15
  "step": 1
16
  },
17
  {
18
+ "epoch": 0.35,
19
+ "eval_loss": 1.3500080108642578,
20
+ "eval_runtime": 22.5154,
21
+ "eval_samples_per_second": 13.058,
22
+ "eval_steps_per_second": 1.643,
23
+ "step": 4
24
  },
25
  {
26
+ "epoch": 1.06,
27
+ "learning_rate": 1.936949724999762e-05,
28
+ "loss": 1.3615,
29
+ "step": 5
 
 
30
  },
31
  {
32
+ "epoch": 1.32,
33
+ "eval_loss": 1.2999709844589233,
34
+ "eval_runtime": 22.4738,
35
+ "eval_samples_per_second": 13.082,
36
+ "eval_steps_per_second": 1.646,
37
+ "step": 8
38
  },
39
  {
40
+ "epoch": 2.13,
41
+ "learning_rate": 1.7557495743542586e-05,
42
+ "loss": 1.3064,
43
+ "step": 10
44
+ },
45
+ {
46
+ "epoch": 2.3,
47
+ "eval_loss": 1.265210747718811,
48
+ "eval_runtime": 22.4153,
49
+ "eval_samples_per_second": 13.116,
50
+ "eval_steps_per_second": 1.651,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 3.19,
55
+ "learning_rate": 1.479248986720057e-05,
56
+ "loss": 1.2626,
57
+ "step": 15
58
+ },
59
+ {
60
+ "epoch": 3.37,
61
+ "eval_loss": 1.2383277416229248,
62
+ "eval_runtime": 22.5157,
63
+ "eval_samples_per_second": 13.058,
64
+ "eval_steps_per_second": 1.643,
65
+ "step": 17
66
  },
67
  {
68
+ "epoch": 3.37,
69
+ "step": 17,
70
+ "total_flos": 7.670550714342441e+17,
71
+ "train_loss": 1.30483017949497,
72
+ "train_runtime": 5957.4264,
73
+ "train_samples_per_second": 3.958,
74
+ "train_steps_per_second": 0.007
75
  }
76
  ],
77
  "logging_steps": 5,
78
  "max_steps": 44,
79
  "num_train_epochs": 4,
80
  "save_steps": 500,
81
+ "total_flos": 7.670550714342441e+17,
82
  "trial_name": null,
83
  "trial_params": null
84
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b24e1f15eab76911260c95314f76d27bf8972d0e9bf4fdbcf3205fc1db47f9a
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f1941f27569a238b088377cad71a086145d5d62cbf0cd926c9aae68936ed966
3
  size 4664