ChiefTheLord commited on
Commit
a0b7d20
verified
1 Parent(s): 1b1bab5

Upload folder using huggingface_hub

Browse files
checkpoints/checkpoint-64-2/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6c423c4117a2b8f7a35bdc820d6787dc69b2c65189b7db40fea8b3f8733a74e
3
+ size 14851088
checkpoints/checkpoint-64-2/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:508d6ef57784aab69536b43f2ce5cd90ebde1abaac9a1175a55235773c7f467d
3
+ size 29695994
checkpoints/checkpoint-64-2/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ed2e591b77a8475236224c0177524db10370f13296177433e7087f6779de2cd
3
+ size 14244
checkpoints/checkpoint-64-2/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc4bfb939478cd93b258d0e9adcb77783c572b76ca6aa183c4f0e89ec4087307
3
+ size 1064
checkpoints/checkpoint-64-2/trainer_state.json ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 25.6,
5
+ "eval_steps": 8,
6
+ "global_step": 64,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 3.2,
13
+ "grad_norm": 4.114430904388428,
14
+ "learning_rate": 5.384615384615385e-06,
15
+ "loss": 1.4122,
16
+ "step": 8
17
+ },
18
+ {
19
+ "epoch": 3.2,
20
+ "eval_accuracy": 0.64,
21
+ "eval_loss": 1.3075119256973267,
22
+ "eval_runtime": 4.2426,
23
+ "eval_samples_per_second": 35.356,
24
+ "eval_steps_per_second": 0.707,
25
+ "step": 8
26
+ },
27
+ {
28
+ "epoch": 6.4,
29
+ "grad_norm": 28.230571746826172,
30
+ "learning_rate": 9.96210254835968e-06,
31
+ "loss": 1.335,
32
+ "step": 16
33
+ },
34
+ {
35
+ "epoch": 6.4,
36
+ "eval_accuracy": 0.6533333333333333,
37
+ "eval_loss": 1.3095086812973022,
38
+ "eval_runtime": 4.2067,
39
+ "eval_samples_per_second": 35.657,
40
+ "eval_steps_per_second": 0.713,
41
+ "step": 16
42
+ },
43
+ {
44
+ "epoch": 9.6,
45
+ "grad_norm": 9.932499885559082,
46
+ "learning_rate": 9.08098456178111e-06,
47
+ "loss": 1.3597,
48
+ "step": 24
49
+ },
50
+ {
51
+ "epoch": 9.6,
52
+ "eval_accuracy": 0.6333333333333333,
53
+ "eval_loss": 1.3208931684494019,
54
+ "eval_runtime": 4.5019,
55
+ "eval_samples_per_second": 33.319,
56
+ "eval_steps_per_second": 0.666,
57
+ "step": 24
58
+ },
59
+ {
60
+ "epoch": 12.8,
61
+ "grad_norm": 24.279809951782227,
62
+ "learning_rate": 7.2286917788826926e-06,
63
+ "loss": 1.3312,
64
+ "step": 32
65
+ },
66
+ {
67
+ "epoch": 12.8,
68
+ "eval_accuracy": 0.6466666666666666,
69
+ "eval_loss": 1.301181674003601,
70
+ "eval_runtime": 5.083,
71
+ "eval_samples_per_second": 29.51,
72
+ "eval_steps_per_second": 0.59,
73
+ "step": 32
74
+ },
75
+ {
76
+ "epoch": 16.0,
77
+ "grad_norm": 2.4543464183807373,
78
+ "learning_rate": 4.846024707219149e-06,
79
+ "loss": 1.334,
80
+ "step": 40
81
+ },
82
+ {
83
+ "epoch": 16.0,
84
+ "eval_accuracy": 0.6466666666666666,
85
+ "eval_loss": 1.3118642568588257,
86
+ "eval_runtime": 4.3277,
87
+ "eval_samples_per_second": 34.66,
88
+ "eval_steps_per_second": 0.693,
89
+ "step": 40
90
+ },
91
+ {
92
+ "epoch": 19.2,
93
+ "grad_norm": 14.416548728942871,
94
+ "learning_rate": 2.5000000000000015e-06,
95
+ "loss": 1.3728,
96
+ "step": 48
97
+ },
98
+ {
99
+ "epoch": 19.2,
100
+ "eval_accuracy": 0.6333333333333333,
101
+ "eval_loss": 1.3421196937561035,
102
+ "eval_runtime": 10.4599,
103
+ "eval_samples_per_second": 14.34,
104
+ "eval_steps_per_second": 0.287,
105
+ "step": 48
106
+ },
107
+ {
108
+ "epoch": 22.4,
109
+ "grad_norm": 8.258885383605957,
110
+ "learning_rate": 7.489143213519301e-07,
111
+ "loss": 1.3078,
112
+ "step": 56
113
+ },
114
+ {
115
+ "epoch": 22.4,
116
+ "eval_accuracy": 0.6333333333333333,
117
+ "eval_loss": 1.3307832479476929,
118
+ "eval_runtime": 4.2226,
119
+ "eval_samples_per_second": 35.523,
120
+ "eval_steps_per_second": 0.71,
121
+ "step": 56
122
+ },
123
+ {
124
+ "epoch": 25.6,
125
+ "grad_norm": 6.2354302406311035,
126
+ "learning_rate": 9.48335631477948e-09,
127
+ "loss": 1.314,
128
+ "step": 64
129
+ },
130
+ {
131
+ "epoch": 25.6,
132
+ "eval_accuracy": 0.6666666666666666,
133
+ "eval_loss": 1.2962980270385742,
134
+ "eval_runtime": 4.267,
135
+ "eval_samples_per_second": 35.153,
136
+ "eval_steps_per_second": 0.703,
137
+ "step": 64
138
+ }
139
+ ],
140
+ "logging_steps": 8,
141
+ "max_steps": 64,
142
+ "num_input_tokens_seen": 0,
143
+ "num_train_epochs": 32,
144
+ "save_steps": 8,
145
+ "stateful_callbacks": {
146
+ "TrainerControl": {
147
+ "args": {
148
+ "should_epoch_stop": false,
149
+ "should_evaluate": false,
150
+ "should_log": false,
151
+ "should_save": true,
152
+ "should_training_stop": true
153
+ },
154
+ "attributes": {}
155
+ }
156
+ },
157
+ "total_flos": 0.0,
158
+ "train_batch_size": 64,
159
+ "trial_name": null,
160
+ "trial_params": null
161
+ }
checkpoints/checkpoint-64-2/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef62b387f1eaf68abc4cae37aee05599ba89174ec638501c69f44b25563cc88d
3
+ size 5112