Tejaswi006 commited on
Commit
ed69064
1 Parent(s): 901c131

Model save

Browse files
README.md CHANGED
@@ -49,15 +49,15 @@ The following hyperparameters were used during training:
49
 
50
  | Training Loss | Epoch | Step | Validation Loss |
51
  |:-------------:|:-----:|:----:|:---------------:|
52
- | 1.7095 | 0.09 | 1 | 1.7013 |
53
  | 1.7095 | 1.07 | 2 | 1.6854 |
54
  | 1.7095 | 2.06 | 3 | 1.6635 |
55
- | 1.7095 | 3.04 | 4 | 1.6378 |
56
- | 1.6715 | 4.02 | 5 | 1.6107 |
57
  | 1.6715 | 5.1 | 7 | 1.5584 |
58
- | 1.6715 | 6.08 | 8 | 1.5345 |
59
- | 1.6715 | 7.06 | 9 | 1.5133 |
60
- | 1.5588 | 8.05 | 10 | 1.4934 |
61
  | 1.5588 | 9.03 | 11 | 1.4756 |
62
 
63
 
 
49
 
50
  | Training Loss | Epoch | Step | Validation Loss |
51
  |:-------------:|:-----:|:----:|:---------------:|
52
+ | 1.7095 | 0.09 | 1 | 1.7015 |
53
  | 1.7095 | 1.07 | 2 | 1.6854 |
54
  | 1.7095 | 2.06 | 3 | 1.6635 |
55
+ | 1.7095 | 3.04 | 4 | 1.6377 |
56
+ | 1.6715 | 4.02 | 5 | 1.6106 |
57
  | 1.6715 | 5.1 | 7 | 1.5584 |
58
+ | 1.6715 | 6.08 | 8 | 1.5344 |
59
+ | 1.6715 | 7.06 | 9 | 1.5132 |
60
+ | 1.5588 | 8.05 | 10 | 1.4933 |
61
  | 1.5588 | 9.03 | 11 | 1.4756 |
62
 
63
 
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77ca5ccdfab3e7b33d87a3f17cf5babcc7eb5f9f741a0ee2885fd1e7aeb00147
3
  size 218138576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:666cc01e681b40ccae20914c7fad0fdcd73f72ed3f8d6b3d3d5f94c4575a253e
3
  size 218138576
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 9.03,
3
- "eval_loss": 1.475583791732788,
4
- "eval_runtime": 6.3363,
5
  "eval_samples": 294,
6
- "eval_samples_per_second": 46.399,
7
- "eval_steps_per_second": 5.839,
8
- "train_loss": 1.714235695925626,
9
- "train_runtime": 4134.2039,
10
  "train_samples": 5895,
11
- "train_samples_per_second": 14.259,
12
  "train_steps_per_second": 0.027
13
  }
 
1
  {
2
  "epoch": 9.03,
3
+ "eval_loss": 1.4756207466125488,
4
+ "eval_runtime": 6.3444,
5
  "eval_samples": 294,
6
+ "eval_samples_per_second": 46.34,
7
+ "eval_steps_per_second": 5.832,
8
+ "train_loss": 1.7142409628087825,
9
+ "train_runtime": 4135.8024,
10
  "train_samples": 5895,
11
+ "train_samples_per_second": 14.254,
12
  "train_steps_per_second": 0.027
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 9.03,
3
- "eval_loss": 1.475583791732788,
4
- "eval_runtime": 6.3363,
5
  "eval_samples": 294,
6
- "eval_samples_per_second": 46.399,
7
- "eval_steps_per_second": 5.839
8
  }
 
1
  {
2
  "epoch": 9.03,
3
+ "eval_loss": 1.4756207466125488,
4
+ "eval_runtime": 6.3444,
5
  "eval_samples": 294,
6
+ "eval_samples_per_second": 46.34,
7
+ "eval_steps_per_second": 5.832
8
  }
runs/Dec14_13-56-05_s4311/events.out.tfevents.1702562244.s4311.817937.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e13bceaa2d273b3b12db226f49a3c79e6f99213d1bbb2e09a5be1614520450aa
3
+ size 7724
runs/Dec14_13-56-05_s4311/events.out.tfevents.1702566386.s4311.817937.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6655036fa20dec055f217bb8032aa555a529d2f754efb3e8a781fcff26d559ac
3
+ size 354
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 9.03,
3
- "train_loss": 1.714235695925626,
4
- "train_runtime": 4134.2039,
5
  "train_samples": 5895,
6
- "train_samples_per_second": 14.259,
7
  "train_steps_per_second": 0.027
8
  }
 
1
  {
2
  "epoch": 9.03,
3
+ "train_loss": 1.7142409628087825,
4
+ "train_runtime": 4135.8024,
5
  "train_samples": 5895,
6
+ "train_samples_per_second": 14.254,
7
  "train_steps_per_second": 0.027
8
  }
trainer_state.json CHANGED
@@ -16,34 +16,34 @@
16
  },
17
  {
18
  "epoch": 0.09,
19
- "eval_loss": 1.7013005018234253,
20
- "eval_runtime": 6.3689,
21
- "eval_samples_per_second": 46.162,
22
- "eval_steps_per_second": 5.809,
23
  "step": 1
24
  },
25
  {
26
  "epoch": 1.07,
27
- "eval_loss": 1.6853851079940796,
28
- "eval_runtime": 6.3663,
29
- "eval_samples_per_second": 46.181,
30
- "eval_steps_per_second": 5.812,
31
  "step": 2
32
  },
33
  {
34
  "epoch": 2.06,
35
- "eval_loss": 1.6635044813156128,
36
- "eval_runtime": 6.3573,
37
- "eval_samples_per_second": 46.246,
38
- "eval_steps_per_second": 5.82,
39
  "step": 3
40
  },
41
  {
42
  "epoch": 3.04,
43
- "eval_loss": 1.6378282308578491,
44
- "eval_runtime": 6.3598,
45
- "eval_samples_per_second": 46.228,
46
- "eval_steps_per_second": 5.818,
47
  "step": 4
48
  },
49
  {
@@ -54,34 +54,34 @@
54
  },
55
  {
56
  "epoch": 4.02,
57
- "eval_loss": 1.610694408416748,
58
- "eval_runtime": 6.3454,
59
- "eval_samples_per_second": 46.333,
60
- "eval_steps_per_second": 5.831,
61
  "step": 5
62
  },
63
  {
64
  "epoch": 5.1,
65
- "eval_loss": 1.5584030151367188,
66
- "eval_runtime": 6.3281,
67
- "eval_samples_per_second": 46.459,
68
- "eval_steps_per_second": 5.847,
69
  "step": 7
70
  },
71
  {
72
  "epoch": 6.08,
73
- "eval_loss": 1.5344550609588623,
74
- "eval_runtime": 6.3648,
75
- "eval_samples_per_second": 46.192,
76
- "eval_steps_per_second": 5.813,
77
  "step": 8
78
  },
79
  {
80
  "epoch": 7.06,
81
- "eval_loss": 1.5132712125778198,
82
- "eval_runtime": 6.3444,
83
- "eval_samples_per_second": 46.34,
84
- "eval_steps_per_second": 5.832,
85
  "step": 9
86
  },
87
  {
@@ -92,27 +92,27 @@
92
  },
93
  {
94
  "epoch": 8.05,
95
- "eval_loss": 1.493425965309143,
96
- "eval_runtime": 6.3668,
97
- "eval_samples_per_second": 46.177,
98
- "eval_steps_per_second": 5.811,
99
  "step": 10
100
  },
101
  {
102
  "epoch": 9.03,
103
- "eval_loss": 1.475648283958435,
104
- "eval_runtime": 6.352,
105
- "eval_samples_per_second": 46.285,
106
- "eval_steps_per_second": 5.825,
107
  "step": 11
108
  },
109
  {
110
  "epoch": 9.03,
111
  "step": 11,
112
  "total_flos": 5.3091621472200294e+17,
113
- "train_loss": 1.714235695925626,
114
- "train_runtime": 4134.2039,
115
- "train_samples_per_second": 14.259,
116
  "train_steps_per_second": 0.027
117
  }
118
  ],
 
16
  },
17
  {
18
  "epoch": 0.09,
19
+ "eval_loss": 1.701454997062683,
20
+ "eval_runtime": 6.3744,
21
+ "eval_samples_per_second": 46.122,
22
+ "eval_steps_per_second": 5.804,
23
  "step": 1
24
  },
25
  {
26
  "epoch": 1.07,
27
+ "eval_loss": 1.6854428052902222,
28
+ "eval_runtime": 6.363,
29
+ "eval_samples_per_second": 46.204,
30
+ "eval_steps_per_second": 5.815,
31
  "step": 2
32
  },
33
  {
34
  "epoch": 2.06,
35
+ "eval_loss": 1.6634844541549683,
36
+ "eval_runtime": 6.3663,
37
+ "eval_samples_per_second": 46.181,
38
+ "eval_steps_per_second": 5.812,
39
  "step": 3
40
  },
41
  {
42
  "epoch": 3.04,
43
+ "eval_loss": 1.6377184391021729,
44
+ "eval_runtime": 6.3346,
45
+ "eval_samples_per_second": 46.412,
46
+ "eval_steps_per_second": 5.841,
47
  "step": 4
48
  },
49
  {
 
54
  },
55
  {
56
  "epoch": 4.02,
57
+ "eval_loss": 1.6106265783309937,
58
+ "eval_runtime": 6.3433,
59
+ "eval_samples_per_second": 46.348,
60
+ "eval_steps_per_second": 5.833,
61
  "step": 5
62
  },
63
  {
64
  "epoch": 5.1,
65
+ "eval_loss": 1.5584189891815186,
66
+ "eval_runtime": 6.3627,
67
+ "eval_samples_per_second": 46.206,
68
+ "eval_steps_per_second": 5.815,
69
  "step": 7
70
  },
71
  {
72
  "epoch": 6.08,
73
+ "eval_loss": 1.5343831777572632,
74
+ "eval_runtime": 6.3403,
75
+ "eval_samples_per_second": 46.37,
76
+ "eval_steps_per_second": 5.836,
77
  "step": 8
78
  },
79
  {
80
  "epoch": 7.06,
81
+ "eval_loss": 1.5131820440292358,
82
+ "eval_runtime": 6.3297,
83
+ "eval_samples_per_second": 46.448,
84
+ "eval_steps_per_second": 5.845,
85
  "step": 9
86
  },
87
  {
 
92
  },
93
  {
94
  "epoch": 8.05,
95
+ "eval_loss": 1.4932821989059448,
96
+ "eval_runtime": 6.3196,
97
+ "eval_samples_per_second": 46.522,
98
+ "eval_steps_per_second": 5.855,
99
  "step": 10
100
  },
101
  {
102
  "epoch": 9.03,
103
+ "eval_loss": 1.4755805730819702,
104
+ "eval_runtime": 6.3502,
105
+ "eval_samples_per_second": 46.298,
106
+ "eval_steps_per_second": 5.827,
107
  "step": 11
108
  },
109
  {
110
  "epoch": 9.03,
111
  "step": 11,
112
  "total_flos": 5.3091621472200294e+17,
113
+ "train_loss": 1.7142409628087825,
114
+ "train_runtime": 4135.8024,
115
+ "train_samples_per_second": 14.254,
116
  "train_steps_per_second": 0.027
117
  }
118
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:06d3d120ec42d827abe951b3cebbab8012a29481eaf1f3e3baf37f66d3ab56b7
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82f9cbd5ae020148e5023e8a94238eb6d948e2533fd584833fd423ac94f00345
3
  size 4664