srikanthsrnvs commited on
Commit
23b3629
1 Parent(s): adea7d7

Upload with huggingface_hub

Browse files
added_tokens.json CHANGED
@@ -1,66 +1,3 @@
1
  {
2
- "<pad>": 32000,
3
- "<pad_0>": 32001,
4
- "<pad_10>": 32011,
5
- "<pad_11>": 32012,
6
- "<pad_12>": 32013,
7
- "<pad_13>": 32014,
8
- "<pad_14>": 32015,
9
- "<pad_15>": 32016,
10
- "<pad_16>": 32017,
11
- "<pad_17>": 32018,
12
- "<pad_18>": 32019,
13
- "<pad_19>": 32020,
14
- "<pad_1>": 32002,
15
- "<pad_20>": 32021,
16
- "<pad_21>": 32022,
17
- "<pad_22>": 32023,
18
- "<pad_23>": 32024,
19
- "<pad_24>": 32025,
20
- "<pad_25>": 32026,
21
- "<pad_26>": 32027,
22
- "<pad_27>": 32028,
23
- "<pad_28>": 32029,
24
- "<pad_29>": 32030,
25
- "<pad_2>": 32003,
26
- "<pad_30>": 32031,
27
- "<pad_31>": 32032,
28
- "<pad_32>": 32033,
29
- "<pad_33>": 32034,
30
- "<pad_34>": 32035,
31
- "<pad_35>": 32036,
32
- "<pad_36>": 32037,
33
- "<pad_37>": 32038,
34
- "<pad_38>": 32039,
35
- "<pad_39>": 32040,
36
- "<pad_3>": 32004,
37
- "<pad_40>": 32041,
38
- "<pad_41>": 32042,
39
- "<pad_42>": 32043,
40
- "<pad_43>": 32044,
41
- "<pad_44>": 32045,
42
- "<pad_45>": 32046,
43
- "<pad_46>": 32047,
44
- "<pad_47>": 32048,
45
- "<pad_48>": 32049,
46
- "<pad_49>": 32050,
47
- "<pad_4>": 32005,
48
- "<pad_50>": 32051,
49
- "<pad_51>": 32052,
50
- "<pad_52>": 32053,
51
- "<pad_53>": 32054,
52
- "<pad_54>": 32055,
53
- "<pad_55>": 32056,
54
- "<pad_56>": 32057,
55
- "<pad_57>": 32058,
56
- "<pad_58>": 32059,
57
- "<pad_59>": 32060,
58
- "<pad_5>": 32006,
59
- "<pad_60>": 32061,
60
- "<pad_61>": 32062,
61
- "<pad_62>": 32063,
62
- "<pad_6>": 32007,
63
- "<pad_7>": 32008,
64
- "<pad_8>": 32009,
65
- "<pad_9>": 32010
66
  }
 
1
  {
2
+ "<pad>": 32000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  }
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "swype/deepshard-13B-ft",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
@@ -9,7 +9,6 @@
9
  "hidden_size": 5120,
10
  "initializer_range": 0.02,
11
  "intermediate_size": 13824,
12
- "max_sequence_length": 2048,
13
  "model_type": "llama",
14
  "num_attention_heads": 40,
15
  "num_hidden_layers": 40,
@@ -19,5 +18,5 @@
19
  "torch_dtype": "float32",
20
  "transformers_version": "4.28.0.dev0",
21
  "use_cache": true,
22
- "vocab_size": 32064
23
  }
 
1
  {
2
+ "_name_or_path": "swype/deepshard-13B-raw",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
 
9
  "hidden_size": 5120,
10
  "initializer_range": 0.02,
11
  "intermediate_size": 13824,
 
12
  "model_type": "llama",
13
  "num_attention_heads": 40,
14
  "num_hidden_layers": 40,
 
18
  "torch_dtype": "float32",
19
  "transformers_version": "4.28.0.dev0",
20
  "use_cache": true,
21
+ "vocab_size": 32001
22
  }
pytorch_model-00001-of-00006.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b471e38cf7666d29c05d5a2608612cac6025986831016dab57ae0d6894345622
3
- size 9957854603
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c79e83b4b6e934facb51f76de7425701fd53520793fc3a62ee6f6e986d1dfd38
3
+ size 9956564363
pytorch_model-00002-of-00006.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4851b0457ff807013f6d89e0464541d2dc6c2ddcde9b4b0cd4ae60a0ca681095
3
  size 9940856385
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54376ff86e52e85da8064236d2420e489ddd80ee5852c3c671d7211e0a3d9629
3
  size 9940856385
pytorch_model-00003-of-00006.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69d6760872e9a7660c51e7ed93f586277e0139d18d3997e6c10a235bd86c04c3
3
  size 9940856943
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4a3294d4ae14e52260acbd6fba427c21e4729fd6b4948f0748b535924d51209
3
  size 9940856943
pytorch_model-00004-of-00006.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6a3798c9896eb6f3408dd7747252ef8df7b22fab32a6fd994181af393abc5d1
3
  size 9867415289
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf196ac8e5dfa0e9b478f71b54bfbd050931242a20f37bcf6f550fc0e5c9ff0b
3
  size 9867415289
pytorch_model-00005-of-00006.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57748377c4aaa8a8cbd182d7a96593289b698b895c664746919708ab30482ff3
3
  size 9867456961
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b7113f68e1a8cd3b1ce01926f3e6b219300ac3ed226c25eb7ab4069af8131c1
3
  size 9867456961
pytorch_model-00006-of-00006.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5de63b0309f9c2f2dec60e21f88f0c02c9afc3fcddd4056f480836a6f23ce29e
3
- size 2491786927
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7860cdc7e83a7380f546a53c6382eca2d41291b0f1ace9bd392c2d5b39d2d9c
3
+ size 2490496687
pytorch_model.bin.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 52066088960
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "pytorch_model-00006-of-00006.bin",
 
1
  {
2
  "metadata": {
3
+ "total_size": 52063508480
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "pytorch_model-00006-of-00006.bin",
runs/Mar22_22-47-46_pslw9jwq6/1679525384.1219966/events.out.tfevents.1679525384.pslw9jwq6.121621.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a97748f7318111b3a504003d814ecb0eb5e9c829ab92ee26fde7e83b53081b0
3
+ size 6071
runs/Mar22_22-47-46_pslw9jwq6/events.out.tfevents.1679525384.pslw9jwq6.121621.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fcb96b622ee819abfbfb30e3669801b1443190effcd18e3719f5e42affc89ba
3
+ size 4893
special_tokens_map.json CHANGED
@@ -1,68 +1,6 @@
1
  {
2
- "additional_special_tokens": [
3
- "<pad_0>",
4
- "<pad_1>",
5
- "<pad_2>",
6
- "<pad_3>",
7
- "<pad_4>",
8
- "<pad_5>",
9
- "<pad_6>",
10
- "<pad_7>",
11
- "<pad_8>",
12
- "<pad_9>",
13
- "<pad_10>",
14
- "<pad_11>",
15
- "<pad_12>",
16
- "<pad_13>",
17
- "<pad_14>",
18
- "<pad_15>",
19
- "<pad_16>",
20
- "<pad_17>",
21
- "<pad_18>",
22
- "<pad_19>",
23
- "<pad_20>",
24
- "<pad_21>",
25
- "<pad_22>",
26
- "<pad_23>",
27
- "<pad_24>",
28
- "<pad_25>",
29
- "<pad_26>",
30
- "<pad_27>",
31
- "<pad_28>",
32
- "<pad_29>",
33
- "<pad_30>",
34
- "<pad_31>",
35
- "<pad_32>",
36
- "<pad_33>",
37
- "<pad_34>",
38
- "<pad_35>",
39
- "<pad_36>",
40
- "<pad_37>",
41
- "<pad_38>",
42
- "<pad_39>",
43
- "<pad_40>",
44
- "<pad_41>",
45
- "<pad_42>",
46
- "<pad_43>",
47
- "<pad_44>",
48
- "<pad_45>",
49
- "<pad_46>",
50
- "<pad_47>",
51
- "<pad_48>",
52
- "<pad_49>",
53
- "<pad_50>",
54
- "<pad_51>",
55
- "<pad_52>",
56
- "<pad_53>",
57
- "<pad_54>",
58
- "<pad_55>",
59
- "<pad_56>",
60
- "<pad_57>",
61
- "<pad_58>",
62
- "<pad_59>",
63
- "<pad_60>",
64
- "<pad_61>",
65
- "<pad_62>"
66
- ],
67
- "pad_token": "<pad_0>"
68
  }
 
1
  {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "pad_token": "<pad>",
5
+ "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  }
tokenizer_config.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "bos_token": "",
3
- "eos_token": "",
4
  "model_max_length": 512,
5
  "padding_side": "right",
6
- "special_tokens_map_file": "/home/ubuntu/.cache/huggingface/hub/models--swype--deepshard-13B-raw/snapshots/225c31ea4b7db4ec9277178b37bb8fbea50aa258/special_tokens_map.json",
7
  "tokenizer_class": "LlamaTokenizer",
8
- "unk_token": ""
9
  }
 
1
  {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
  "model_max_length": 512,
5
  "padding_side": "right",
6
+ "special_tokens_map_file": "/home/paperspace/.cache/huggingface/hub/models--swype--deepshard-13B-raw/snapshots/956dd7d2c805d149e9c3d4bef1c16c2df6802408/special_tokens_map.json",
7
  "tokenizer_class": "LlamaTokenizer",
8
+ "unk_token": "<unk>"
9
  }
trainer_state.json CHANGED
@@ -2,266 +2,42 @@
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 3.0,
5
- "global_step": 39,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
- {
11
- "epoch": 0.08,
12
- "learning_rate": 1e-05,
13
- "loss": 0.5431,
14
- "step": 1
15
- },
16
- {
17
- "epoch": 0.15,
18
- "learning_rate": 2e-05,
19
- "loss": 0.5217,
20
- "step": 2
21
- },
22
- {
23
- "epoch": 0.23,
24
- "learning_rate": 1.9963974885425267e-05,
25
- "loss": 0.4643,
26
- "step": 3
27
- },
28
- {
29
- "epoch": 0.31,
30
- "learning_rate": 1.9856159103477085e-05,
31
- "loss": 0.4449,
32
- "step": 4
33
- },
34
- {
35
- "epoch": 0.38,
36
- "learning_rate": 1.967732946933499e-05,
37
- "loss": 0.4399,
38
- "step": 5
39
- },
40
- {
41
- "epoch": 0.46,
42
- "learning_rate": 1.9428774454610845e-05,
43
- "loss": 0.4314,
44
- "step": 6
45
- },
46
- {
47
- "epoch": 0.54,
48
- "learning_rate": 1.911228490388136e-05,
49
- "loss": 0.44,
50
- "step": 7
51
- },
52
- {
53
- "epoch": 0.62,
54
- "learning_rate": 1.8730141131611882e-05,
55
- "loss": 0.422,
56
- "step": 8
57
- },
58
- {
59
- "epoch": 0.69,
60
- "learning_rate": 1.8285096492438424e-05,
61
- "loss": 0.4005,
62
- "step": 9
63
- },
64
- {
65
- "epoch": 0.77,
66
- "learning_rate": 1.7780357543184396e-05,
67
- "loss": 0.4117,
68
- "step": 10
69
- },
70
- {
71
- "epoch": 0.85,
72
- "learning_rate": 1.7219560939545246e-05,
73
- "loss": 0.3954,
74
- "step": 11
75
- },
76
- {
77
- "epoch": 0.92,
78
- "learning_rate": 1.6606747233900816e-05,
79
- "loss": 0.3866,
80
- "step": 12
81
- },
82
  {
83
  "epoch": 1.0,
84
- "learning_rate": 1.594633176304287e-05,
85
- "loss": 0.3906,
86
- "step": 13
87
- },
88
- {
89
- "epoch": 1.08,
90
- "learning_rate": 1.5243072835572319e-05,
91
- "loss": 0.3271,
92
- "step": 14
93
- },
94
- {
95
- "epoch": 1.15,
96
- "learning_rate": 1.4502037448176734e-05,
97
- "loss": 0.3647,
98
- "step": 15
99
- },
100
- {
101
- "epoch": 1.23,
102
- "learning_rate": 1.3728564777803089e-05,
103
- "loss": 0.3168,
104
- "step": 16
105
- },
106
- {
107
- "epoch": 1.31,
108
- "learning_rate": 1.2928227712765504e-05,
109
- "loss": 0.3172,
110
- "step": 17
111
- },
112
- {
113
- "epoch": 1.38,
114
- "learning_rate": 1.2106792699957264e-05,
115
- "loss": 0.3152,
116
- "step": 18
117
- },
118
- {
119
- "epoch": 1.46,
120
- "learning_rate": 1.1270178197468788e-05,
121
- "loss": 0.3392,
122
- "step": 19
123
- },
124
- {
125
- "epoch": 1.54,
126
- "learning_rate": 1.0424412031961485e-05,
127
- "loss": 0.3301,
128
- "step": 20
129
- },
130
- {
131
- "epoch": 1.62,
132
- "learning_rate": 9.57558796803852e-06,
133
- "loss": 0.2919,
134
- "step": 21
135
- },
136
- {
137
- "epoch": 1.69,
138
- "learning_rate": 8.729821802531213e-06,
139
- "loss": 0.2873,
140
- "step": 22
141
- },
142
- {
143
- "epoch": 1.77,
144
- "learning_rate": 7.89320730004274e-06,
145
- "loss": 0.3345,
146
- "step": 23
147
- },
148
- {
149
- "epoch": 1.85,
150
- "learning_rate": 7.071772287234497e-06,
151
- "loss": 0.3276,
152
- "step": 24
153
- },
154
- {
155
- "epoch": 1.92,
156
- "learning_rate": 6.2714352221969155e-06,
157
- "loss": 0.2866,
158
- "step": 25
159
  },
160
  {
161
  "epoch": 2.0,
162
- "learning_rate": 5.497962551823266e-06,
163
- "loss": 0.2947,
164
- "step": 26
165
- },
166
- {
167
- "epoch": 2.08,
168
- "learning_rate": 4.756927164427685e-06,
169
- "loss": 0.2738,
170
- "step": 27
171
- },
172
- {
173
- "epoch": 2.15,
174
- "learning_rate": 4.053668236957135e-06,
175
- "loss": 0.2464,
176
- "step": 28
177
- },
178
- {
179
- "epoch": 2.23,
180
- "learning_rate": 3.3932527660991877e-06,
181
- "loss": 0.2749,
182
- "step": 29
183
- },
184
- {
185
- "epoch": 2.31,
186
- "learning_rate": 2.780439060454756e-06,
187
- "loss": 0.2664,
188
- "step": 30
189
- },
190
- {
191
- "epoch": 2.38,
192
- "learning_rate": 2.2196424568156073e-06,
193
- "loss": 0.2852,
194
- "step": 31
195
- },
196
- {
197
- "epoch": 2.46,
198
- "learning_rate": 1.7149035075615795e-06,
199
- "loss": 0.2876,
200
- "step": 32
201
- },
202
- {
203
- "epoch": 2.54,
204
- "learning_rate": 1.2698588683881185e-06,
205
- "loss": 0.2691,
206
- "step": 33
207
- },
208
- {
209
- "epoch": 2.62,
210
- "learning_rate": 8.87715096118642e-07,
211
- "loss": 0.2448,
212
- "step": 34
213
- },
214
- {
215
- "epoch": 2.69,
216
- "learning_rate": 5.71225545389158e-07,
217
- "loss": 0.2784,
218
- "step": 35
219
- },
220
- {
221
- "epoch": 2.77,
222
- "learning_rate": 3.226705306650113e-07,
223
- "loss": 0.2722,
224
- "step": 36
225
- },
226
- {
227
- "epoch": 2.85,
228
- "learning_rate": 1.4384089652291544e-07,
229
- "loss": 0.2862,
230
- "step": 37
231
- },
232
- {
233
- "epoch": 2.92,
234
- "learning_rate": 3.602511457473479e-08,
235
- "loss": 0.2631,
236
- "step": 38
237
  },
238
  {
239
  "epoch": 3.0,
240
  "learning_rate": 0.0,
241
- "loss": 0.2251,
242
- "step": 39
243
- },
244
- {
245
- "epoch": 3.0,
246
- "step": 39,
247
- "total_flos": 2.4951731691454464e+16,
248
- "train_loss": 0.3409684506746439,
249
- "train_runtime": 1410.2975,
250
- "train_samples_per_second": 3.54,
251
- "train_steps_per_second": 0.028
252
  },
253
  {
254
  "epoch": 3.0,
255
- "eval_loss": 0.2879543900489807,
256
- "eval_runtime": 10.2141,
257
- "eval_samples_per_second": 40.728,
258
- "eval_steps_per_second": 1.273,
259
- "step": 39
 
260
  }
261
  ],
262
- "max_steps": 39,
263
  "num_train_epochs": 3,
264
- "total_flos": 2.4951731691454464e+16,
265
  "trial_name": null,
266
  "trial_params": null
267
  }
 
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 3.0,
5
+ "global_step": 3,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  {
11
  "epoch": 1.0,
12
+ "learning_rate": 2e-05,
13
+ "loss": 1.0501,
14
+ "step": 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  },
16
  {
17
  "epoch": 2.0,
18
+ "learning_rate": 1e-05,
19
+ "loss": 1.0767,
20
+ "step": 2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  },
22
  {
23
  "epoch": 3.0,
24
  "learning_rate": 0.0,
25
+ "loss": 0.7536,
26
+ "step": 3
 
 
 
 
 
 
 
 
 
27
  },
28
  {
29
  "epoch": 3.0,
30
+ "step": 3,
31
+ "total_flos": 2327628803473408.0,
32
+ "train_loss": 0.9601547519365946,
33
+ "train_runtime": 1446.9004,
34
+ "train_samples_per_second": 0.531,
35
+ "train_steps_per_second": 0.002
36
  }
37
  ],
38
+ "max_steps": 3,
39
  "num_train_epochs": 3,
40
+ "total_flos": 2327628803473408.0,
41
  "trial_name": null,
42
  "trial_params": null
43
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0307ecb82ecfd276a98da54f39915387c1581a2d03ada66ef5a4af4653463a47
3
- size 3771
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a37c01339dfc23b560f46c214c764dcc232eff864973e5037cb88d8c3da23be3
3
+ size 3835