Harshkmr commited on
Commit
ac4d284
1 Parent(s): 46c4dd8

Model save

Browse files
Files changed (5) hide show
  1. README.md +82 -0
  2. all_results.json +37 -0
  3. eval_results.json +31 -0
  4. train_results.json +9 -0
  5. trainer_state.json +550 -0
README.md ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ base_model: microsoft/deberta-v3-base
4
+ tags:
5
+ - generated_from_trainer
6
+ model-index:
7
+ - name: deberta-v3-base_finetuned_bluegennx_run2.19_2e
8
+ results: []
9
+ ---
10
+
11
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
+ should probably proofread and complete it, then remove this comment. -->
13
+
14
+ # deberta-v3-base_finetuned_bluegennx_run2.19_2e
15
+
16
+ This model is a fine-tuned version of [microsoft/deberta-v3-base](https://huggingface.co/microsoft/deberta-v3-base) on an unknown dataset.
17
+ It achieves the following results on the evaluation set:
18
+ - Loss: 0.0201
19
+ - Overall Precision: 0.9745
20
+ - Overall Recall: 0.9862
21
+ - Overall F1: 0.9803
22
+ - Overall Accuracy: 0.9952
23
+ - Aadhar Card F1: 0.9837
24
+ - Age F1: 0.9633
25
+ - City F1: 0.9842
26
+ - Country F1: 0.9843
27
+ - Creditcardcvv F1: 0.9879
28
+ - Creditcardnumber F1: 0.9416
29
+ - Date F1: 0.9600
30
+ - Dateofbirth F1: 0.9023
31
+ - Email F1: 0.9900
32
+ - Expirydate F1: 0.9912
33
+ - Organization F1: 0.9910
34
+ - Pan Card F1: 0.9867
35
+ - Person F1: 0.9878
36
+ - Phonenumber F1: 0.9858
37
+ - Pincode F1: 0.9907
38
+ - Secondaryaddress F1: 0.9878
39
+ - State F1: 0.9909
40
+ - Time F1: 0.9820
41
+ - Url F1: 0.9949
42
+
43
+ ## Model description
44
+
45
+ More information needed
46
+
47
+ ## Intended uses & limitations
48
+
49
+ More information needed
50
+
51
+ ## Training and evaluation data
52
+
53
+ More information needed
54
+
55
+ ## Training procedure
56
+
57
+ ### Training hyperparameters
58
+
59
+ The following hyperparameters were used during training:
60
+ - learning_rate: 5e-05
61
+ - train_batch_size: 4
62
+ - eval_batch_size: 4
63
+ - seed: 42
64
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
65
+ - lr_scheduler_type: cosine_with_restarts
66
+ - lr_scheduler_warmup_ratio: 0.2
67
+ - num_epochs: 2
68
+
69
+ ### Training results
70
+
71
+ | Training Loss | Epoch | Step | Validation Loss | Overall Precision | Overall Recall | Overall F1 | Overall Accuracy | Aadhar Card F1 | Age F1 | City F1 | Country F1 | Creditcardcvv F1 | Creditcardnumber F1 | Date F1 | Dateofbirth F1 | Email F1 | Expirydate F1 | Organization F1 | Pan Card F1 | Person F1 | Phonenumber F1 | Pincode F1 | Secondaryaddress F1 | State F1 | Time F1 | Url F1 |
72
+ |:-------------:|:-----:|:-----:|:---------------:|:-----------------:|:--------------:|:----------:|:----------------:|:--------------:|:------:|:-------:|:----------:|:----------------:|:-------------------:|:-------:|:--------------:|:--------:|:-------------:|:---------------:|:-----------:|:---------:|:--------------:|:----------:|:-------------------:|:--------:|:-------:|:------:|
73
+ | 0.0261 | 1.0 | 15321 | 0.0287 | 0.9619 | 0.9781 | 0.9700 | 0.9934 | 0.9613 | 0.9463 | 0.9541 | 0.9832 | 0.9793 | 0.9270 | 0.9481 | 0.8767 | 0.9793 | 0.9809 | 0.9882 | 0.9751 | 0.9840 | 0.9747 | 0.9835 | 0.9831 | 0.9620 | 0.9780 | 0.9873 |
74
+ | 0.0152 | 2.0 | 30642 | 0.0201 | 0.9745 | 0.9862 | 0.9803 | 0.9952 | 0.9837 | 0.9633 | 0.9842 | 0.9843 | 0.9879 | 0.9416 | 0.9600 | 0.9023 | 0.9900 | 0.9912 | 0.9910 | 0.9867 | 0.9878 | 0.9858 | 0.9907 | 0.9878 | 0.9909 | 0.9820 | 0.9949 |
75
+
76
+
77
+ ### Framework versions
78
+
79
+ - Transformers 4.39.3
80
+ - Pytorch 2.1.2
81
+ - Datasets 2.18.0
82
+ - Tokenizers 0.15.2
all_results.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "eval_AADHAR_CARD_f1": 0.9837270341207349,
4
+ "eval_AGE_f1": 0.9633416458852868,
5
+ "eval_CITY_f1": 0.9842361227570016,
6
+ "eval_COUNTRY_f1": 0.9843467790487658,
7
+ "eval_CREDITCARDCVV_f1": 0.9878760664571171,
8
+ "eval_CREDITCARDNUMBER_f1": 0.9416398138202648,
9
+ "eval_DATEOFBIRTH_f1": 0.9023332645054718,
10
+ "eval_DATE_f1": 0.9600118046333186,
11
+ "eval_EMAIL_f1": 0.990012854741422,
12
+ "eval_EXPIRYDATE_f1": 0.9912280701754386,
13
+ "eval_ORGANIZATION_f1": 0.991032304086416,
14
+ "eval_PAN_CARD_f1": 0.9867424242424242,
15
+ "eval_PERSON_f1": 0.9877905928996216,
16
+ "eval_PHONENUMBER_f1": 0.9857583321098224,
17
+ "eval_PINCODE_f1": 0.9907161803713527,
18
+ "eval_SECONDARYADDRESS_f1": 0.9877938061131848,
19
+ "eval_STATE_f1": 0.9909125815947779,
20
+ "eval_TIME_f1": 0.9819761530640541,
21
+ "eval_URL_f1": 0.9948626312262676,
22
+ "eval_loss": 0.020110823214054108,
23
+ "eval_overall_accuracy": 0.9951943362620375,
24
+ "eval_overall_f1": 0.9803088380243128,
25
+ "eval_overall_precision": 0.9744924065102607,
26
+ "eval_overall_recall": 0.9861951192640335,
27
+ "eval_runtime": 249.9695,
28
+ "eval_samples": 15321,
29
+ "eval_samples_per_second": 61.291,
30
+ "eval_steps_per_second": 15.326,
31
+ "total_flos": 1.1909329832222172e+16,
32
+ "train_loss": 0.06240972641763811,
33
+ "train_runtime": 5473.6718,
34
+ "train_samples": 61281,
35
+ "train_samples_per_second": 22.391,
36
+ "train_steps_per_second": 5.598
37
+ }
eval_results.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "eval_AADHAR_CARD_f1": 0.9837270341207349,
4
+ "eval_AGE_f1": 0.9633416458852868,
5
+ "eval_CITY_f1": 0.9842361227570016,
6
+ "eval_COUNTRY_f1": 0.9843467790487658,
7
+ "eval_CREDITCARDCVV_f1": 0.9878760664571171,
8
+ "eval_CREDITCARDNUMBER_f1": 0.9416398138202648,
9
+ "eval_DATEOFBIRTH_f1": 0.9023332645054718,
10
+ "eval_DATE_f1": 0.9600118046333186,
11
+ "eval_EMAIL_f1": 0.990012854741422,
12
+ "eval_EXPIRYDATE_f1": 0.9912280701754386,
13
+ "eval_ORGANIZATION_f1": 0.991032304086416,
14
+ "eval_PAN_CARD_f1": 0.9867424242424242,
15
+ "eval_PERSON_f1": 0.9877905928996216,
16
+ "eval_PHONENUMBER_f1": 0.9857583321098224,
17
+ "eval_PINCODE_f1": 0.9907161803713527,
18
+ "eval_SECONDARYADDRESS_f1": 0.9877938061131848,
19
+ "eval_STATE_f1": 0.9909125815947779,
20
+ "eval_TIME_f1": 0.9819761530640541,
21
+ "eval_URL_f1": 0.9948626312262676,
22
+ "eval_loss": 0.020110823214054108,
23
+ "eval_overall_accuracy": 0.9951943362620375,
24
+ "eval_overall_f1": 0.9803088380243128,
25
+ "eval_overall_precision": 0.9744924065102607,
26
+ "eval_overall_recall": 0.9861951192640335,
27
+ "eval_runtime": 249.9695,
28
+ "eval_samples": 15321,
29
+ "eval_samples_per_second": 61.291,
30
+ "eval_steps_per_second": 15.326
31
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "total_flos": 1.1909329832222172e+16,
4
+ "train_loss": 0.06240972641763811,
5
+ "train_runtime": 5473.6718,
6
+ "train_samples": 61281,
7
+ "train_samples_per_second": 22.391,
8
+ "train_steps_per_second": 5.598
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,550 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.020110823214054108,
3
+ "best_model_checkpoint": "./deberta-v3-base_finetuned_bluegennx_run2.19_2e/checkpoint-30642",
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 30642,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.03,
13
+ "grad_norm": 2.541147470474243,
14
+ "learning_rate": 4.078968836678088e-06,
15
+ "loss": 1.5263,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.07,
20
+ "grad_norm": 1.968030571937561,
21
+ "learning_rate": 8.157937673356177e-06,
22
+ "loss": 0.2859,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 0.1,
27
+ "grad_norm": 0.2945529818534851,
28
+ "learning_rate": 1.2236906510034265e-05,
29
+ "loss": 0.1322,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 0.13,
34
+ "grad_norm": 1.4127711057662964,
35
+ "learning_rate": 1.6315875346712353e-05,
36
+ "loss": 0.0928,
37
+ "step": 2000
38
+ },
39
+ {
40
+ "epoch": 0.16,
41
+ "grad_norm": 1.8087753057479858,
42
+ "learning_rate": 2.039484418339044e-05,
43
+ "loss": 0.0827,
44
+ "step": 2500
45
+ },
46
+ {
47
+ "epoch": 0.2,
48
+ "grad_norm": 0.4666314125061035,
49
+ "learning_rate": 2.447381302006853e-05,
50
+ "loss": 0.0665,
51
+ "step": 3000
52
+ },
53
+ {
54
+ "epoch": 0.23,
55
+ "grad_norm": 2.213435411453247,
56
+ "learning_rate": 2.8552781856746613e-05,
57
+ "loss": 0.0621,
58
+ "step": 3500
59
+ },
60
+ {
61
+ "epoch": 0.26,
62
+ "grad_norm": 0.5923435091972351,
63
+ "learning_rate": 3.2631750693424706e-05,
64
+ "loss": 0.0622,
65
+ "step": 4000
66
+ },
67
+ {
68
+ "epoch": 0.29,
69
+ "grad_norm": 0.3192698657512665,
70
+ "learning_rate": 3.6710719530102796e-05,
71
+ "loss": 0.0545,
72
+ "step": 4500
73
+ },
74
+ {
75
+ "epoch": 0.33,
76
+ "grad_norm": 0.21803532540798187,
77
+ "learning_rate": 4.078968836678088e-05,
78
+ "loss": 0.0557,
79
+ "step": 5000
80
+ },
81
+ {
82
+ "epoch": 0.36,
83
+ "grad_norm": 0.024202294647693634,
84
+ "learning_rate": 4.486865720345897e-05,
85
+ "loss": 0.0468,
86
+ "step": 5500
87
+ },
88
+ {
89
+ "epoch": 0.39,
90
+ "grad_norm": 0.12951496243476868,
91
+ "learning_rate": 4.894762604013706e-05,
92
+ "loss": 0.0501,
93
+ "step": 6000
94
+ },
95
+ {
96
+ "epoch": 0.42,
97
+ "grad_norm": 0.19036373496055603,
98
+ "learning_rate": 4.9971745812299085e-05,
99
+ "loss": 0.0521,
100
+ "step": 6500
101
+ },
102
+ {
103
+ "epoch": 0.46,
104
+ "grad_norm": 1.3507754802703857,
105
+ "learning_rate": 4.984440267634476e-05,
106
+ "loss": 0.048,
107
+ "step": 7000
108
+ },
109
+ {
110
+ "epoch": 0.49,
111
+ "grad_norm": 0.057900320738554,
112
+ "learning_rate": 4.9615076778917275e-05,
113
+ "loss": 0.0448,
114
+ "step": 7500
115
+ },
116
+ {
117
+ "epoch": 0.52,
118
+ "grad_norm": 0.3218132257461548,
119
+ "learning_rate": 4.928470947041259e-05,
120
+ "loss": 0.0466,
121
+ "step": 8000
122
+ },
123
+ {
124
+ "epoch": 0.55,
125
+ "grad_norm": 0.9866151213645935,
126
+ "learning_rate": 4.885465686193794e-05,
127
+ "loss": 0.0441,
128
+ "step": 8500
129
+ },
130
+ {
131
+ "epoch": 0.59,
132
+ "grad_norm": 1.0197815895080566,
133
+ "learning_rate": 4.832668425866738e-05,
134
+ "loss": 0.0441,
135
+ "step": 9000
136
+ },
137
+ {
138
+ "epoch": 0.62,
139
+ "grad_norm": 0.9317641854286194,
140
+ "learning_rate": 4.7702958913513574e-05,
141
+ "loss": 0.0445,
142
+ "step": 9500
143
+ },
144
+ {
145
+ "epoch": 0.65,
146
+ "grad_norm": 0.06256090104579926,
147
+ "learning_rate": 4.69860411308609e-05,
148
+ "loss": 0.0407,
149
+ "step": 10000
150
+ },
151
+ {
152
+ "epoch": 0.69,
153
+ "grad_norm": 0.36358851194381714,
154
+ "learning_rate": 4.6178873756877836e-05,
155
+ "loss": 0.0421,
156
+ "step": 10500
157
+ },
158
+ {
159
+ "epoch": 0.72,
160
+ "grad_norm": 0.4968320429325104,
161
+ "learning_rate": 4.528477009954938e-05,
162
+ "loss": 0.0304,
163
+ "step": 11000
164
+ },
165
+ {
166
+ "epoch": 0.75,
167
+ "grad_norm": 1.2788376808166504,
168
+ "learning_rate": 4.4307400328016045e-05,
169
+ "loss": 0.0411,
170
+ "step": 11500
171
+ },
172
+ {
173
+ "epoch": 0.78,
174
+ "grad_norm": 0.6715700626373291,
175
+ "learning_rate": 4.325077640704834e-05,
176
+ "loss": 0.0336,
177
+ "step": 12000
178
+ },
179
+ {
180
+ "epoch": 0.82,
181
+ "grad_norm": 1.854251503944397,
182
+ "learning_rate": 4.211923562849836e-05,
183
+ "loss": 0.0353,
184
+ "step": 12500
185
+ },
186
+ {
187
+ "epoch": 0.85,
188
+ "grad_norm": 0.044830091297626495,
189
+ "learning_rate": 4.0917422807329866e-05,
190
+ "loss": 0.0361,
191
+ "step": 13000
192
+ },
193
+ {
194
+ "epoch": 0.88,
195
+ "grad_norm": 0.010744288563728333,
196
+ "learning_rate": 3.9650271215309246e-05,
197
+ "loss": 0.0347,
198
+ "step": 13500
199
+ },
200
+ {
201
+ "epoch": 0.91,
202
+ "grad_norm": 2.754666566848755,
203
+ "learning_rate": 3.832298233062228e-05,
204
+ "loss": 0.0316,
205
+ "step": 14000
206
+ },
207
+ {
208
+ "epoch": 0.95,
209
+ "grad_norm": 1.647675633430481,
210
+ "learning_rate": 3.6941004486541304e-05,
211
+ "loss": 0.0273,
212
+ "step": 14500
213
+ },
214
+ {
215
+ "epoch": 0.98,
216
+ "grad_norm": 0.013370398432016373,
217
+ "learning_rate": 3.5510010506787223e-05,
218
+ "loss": 0.0261,
219
+ "step": 15000
220
+ },
221
+ {
222
+ "epoch": 1.0,
223
+ "eval_AADHAR_CARD_f1": 0.9613259668508287,
224
+ "eval_AGE_f1": 0.9462576687116565,
225
+ "eval_CITY_f1": 0.9541174456428695,
226
+ "eval_COUNTRY_f1": 0.983173076923077,
227
+ "eval_CREDITCARDCVV_f1": 0.9793072424651371,
228
+ "eval_CREDITCARDNUMBER_f1": 0.926996918701114,
229
+ "eval_DATEOFBIRTH_f1": 0.8766835016835016,
230
+ "eval_DATE_f1": 0.9481049562682217,
231
+ "eval_EMAIL_f1": 0.9793307086614174,
232
+ "eval_EXPIRYDATE_f1": 0.9809104258443465,
233
+ "eval_ORGANIZATION_f1": 0.9881575603557815,
234
+ "eval_PAN_CARD_f1": 0.9750609413088318,
235
+ "eval_PERSON_f1": 0.984000540066158,
236
+ "eval_PHONENUMBER_f1": 0.9747039040795439,
237
+ "eval_PINCODE_f1": 0.9834765366820886,
238
+ "eval_SECONDARYADDRESS_f1": 0.9830883833299778,
239
+ "eval_STATE_f1": 0.9619748161077173,
240
+ "eval_TIME_f1": 0.9779588812743101,
241
+ "eval_URL_f1": 0.9872625698324023,
242
+ "eval_loss": 0.028734495863318443,
243
+ "eval_overall_accuracy": 0.9933771207589489,
244
+ "eval_overall_f1": 0.9699552059434065,
245
+ "eval_overall_precision": 0.9619155967278834,
246
+ "eval_overall_recall": 0.9781303365834848,
247
+ "eval_runtime": 285.0273,
248
+ "eval_samples_per_second": 53.753,
249
+ "eval_steps_per_second": 13.441,
250
+ "step": 15321
251
+ },
252
+ {
253
+ "epoch": 1.01,
254
+ "grad_norm": 0.2590649425983429,
255
+ "learning_rate": 3.403587441938993e-05,
256
+ "loss": 0.0302,
257
+ "step": 15500
258
+ },
259
+ {
260
+ "epoch": 1.04,
261
+ "grad_norm": 0.24032843112945557,
262
+ "learning_rate": 3.2524647344633493e-05,
263
+ "loss": 0.0267,
264
+ "step": 16000
265
+ },
266
+ {
267
+ "epoch": 1.08,
268
+ "grad_norm": 0.031069770455360413,
269
+ "learning_rate": 3.0982532656062604e-05,
270
+ "loss": 0.0244,
271
+ "step": 16500
272
+ },
273
+ {
274
+ "epoch": 1.11,
275
+ "grad_norm": 0.65534907579422,
276
+ "learning_rate": 2.9415860516511033e-05,
277
+ "loss": 0.024,
278
+ "step": 17000
279
+ },
280
+ {
281
+ "epoch": 1.14,
282
+ "grad_norm": 1.3654996156692505,
283
+ "learning_rate": 2.783106189367786e-05,
284
+ "loss": 0.0227,
285
+ "step": 17500
286
+ },
287
+ {
288
+ "epoch": 1.17,
289
+ "grad_norm": 0.11274850368499756,
290
+ "learning_rate": 2.623464216191404e-05,
291
+ "loss": 0.0225,
292
+ "step": 18000
293
+ },
294
+ {
295
+ "epoch": 1.21,
296
+ "grad_norm": 3.295466661453247,
297
+ "learning_rate": 2.463315439858015e-05,
298
+ "loss": 0.0208,
299
+ "step": 18500
300
+ },
301
+ {
302
+ "epoch": 1.24,
303
+ "grad_norm": 1.3590203523635864,
304
+ "learning_rate": 2.303317248459006e-05,
305
+ "loss": 0.0246,
306
+ "step": 19000
307
+ },
308
+ {
309
+ "epoch": 1.27,
310
+ "grad_norm": 0.02803308703005314,
311
+ "learning_rate": 2.1441264119558903e-05,
312
+ "loss": 0.022,
313
+ "step": 19500
314
+ },
315
+ {
316
+ "epoch": 1.31,
317
+ "grad_norm": 0.3329419493675232,
318
+ "learning_rate": 1.9863963862324454e-05,
319
+ "loss": 0.0215,
320
+ "step": 20000
321
+ },
322
+ {
323
+ "epoch": 1.34,
324
+ "grad_norm": 1.7840496301651,
325
+ "learning_rate": 1.830774630750671e-05,
326
+ "loss": 0.021,
327
+ "step": 20500
328
+ },
329
+ {
330
+ "epoch": 1.37,
331
+ "grad_norm": 0.012373638339340687,
332
+ "learning_rate": 1.6778999508212068e-05,
333
+ "loss": 0.0192,
334
+ "step": 21000
335
+ },
336
+ {
337
+ "epoch": 1.4,
338
+ "grad_norm": 0.7150644063949585,
339
+ "learning_rate": 1.528399875397824e-05,
340
+ "loss": 0.0193,
341
+ "step": 21500
342
+ },
343
+ {
344
+ "epoch": 1.44,
345
+ "grad_norm": 1.5099574327468872,
346
+ "learning_rate": 1.3828880811597606e-05,
347
+ "loss": 0.0181,
348
+ "step": 22000
349
+ },
350
+ {
351
+ "epoch": 1.47,
352
+ "grad_norm": 0.07326490432024002,
353
+ "learning_rate": 1.2419618734556984e-05,
354
+ "loss": 0.0184,
355
+ "step": 22500
356
+ },
357
+ {
358
+ "epoch": 1.5,
359
+ "grad_norm": 0.23956900835037231,
360
+ "learning_rate": 1.1061997344497197e-05,
361
+ "loss": 0.0174,
362
+ "step": 23000
363
+ },
364
+ {
365
+ "epoch": 1.53,
366
+ "grad_norm": 0.00975050963461399,
367
+ "learning_rate": 9.761589485337946e-06,
368
+ "loss": 0.0181,
369
+ "step": 23500
370
+ },
371
+ {
372
+ "epoch": 1.57,
373
+ "grad_norm": 0.04456551373004913,
374
+ "learning_rate": 8.523733147541155e-06,
375
+ "loss": 0.0155,
376
+ "step": 24000
377
+ },
378
+ {
379
+ "epoch": 1.6,
380
+ "grad_norm": 0.02919822372496128,
381
+ "learning_rate": 7.3535095564145976e-06,
382
+ "loss": 0.0181,
383
+ "step": 24500
384
+ },
385
+ {
386
+ "epoch": 1.63,
387
+ "grad_norm": 0.07429083436727524,
388
+ "learning_rate": 6.2557223144002006e-06,
389
+ "loss": 0.0211,
390
+ "step": 25000
391
+ },
392
+ {
393
+ "epoch": 1.66,
394
+ "grad_norm": 0.7385916113853455,
395
+ "learning_rate": 5.234877682964934e-06,
396
+ "loss": 0.0158,
397
+ "step": 25500
398
+ },
399
+ {
400
+ "epoch": 1.7,
401
+ "grad_norm": 0.06193115562200546,
402
+ "learning_rate": 4.295166085034444e-06,
403
+ "loss": 0.0152,
404
+ "step": 26000
405
+ },
406
+ {
407
+ "epoch": 1.73,
408
+ "grad_norm": 0.24567507207393646,
409
+ "learning_rate": 3.440444903899198e-06,
410
+ "loss": 0.0191,
411
+ "step": 26500
412
+ },
413
+ {
414
+ "epoch": 1.76,
415
+ "grad_norm": 0.2641615867614746,
416
+ "learning_rate": 2.6742226492012856e-06,
417
+ "loss": 0.0145,
418
+ "step": 27000
419
+ },
420
+ {
421
+ "epoch": 1.79,
422
+ "grad_norm": 0.020536798983812332,
423
+ "learning_rate": 1.999644554998209e-06,
424
+ "loss": 0.0151,
425
+ "step": 27500
426
+ },
427
+ {
428
+ "epoch": 1.83,
429
+ "grad_norm": 0.5791102051734924,
430
+ "learning_rate": 1.4194796690216673e-06,
431
+ "loss": 0.0162,
432
+ "step": 28000
433
+ },
434
+ {
435
+ "epoch": 1.86,
436
+ "grad_norm": 0.14257295429706573,
437
+ "learning_rate": 9.361094861279646e-07,
438
+ "loss": 0.0142,
439
+ "step": 28500
440
+ },
441
+ {
442
+ "epoch": 1.89,
443
+ "grad_norm": 0.7881425619125366,
444
+ "learning_rate": 5.515181725982282e-07,
445
+ "loss": 0.0151,
446
+ "step": 29000
447
+ },
448
+ {
449
+ "epoch": 1.93,
450
+ "grad_norm": 0.030007589608430862,
451
+ "learning_rate": 2.672844214162862e-07,
452
+ "loss": 0.0153,
453
+ "step": 29500
454
+ },
455
+ {
456
+ "epoch": 1.96,
457
+ "grad_norm": 0.5309509634971619,
458
+ "learning_rate": 8.457497195707431e-08,
459
+ "loss": 0.0177,
460
+ "step": 30000
461
+ },
462
+ {
463
+ "epoch": 1.99,
464
+ "grad_norm": 1.1870931386947632,
465
+ "learning_rate": 4.139820686432927e-09,
466
+ "loss": 0.0152,
467
+ "step": 30500
468
+ },
469
+ {
470
+ "epoch": 2.0,
471
+ "eval_AADHAR_CARD_f1": 0.9837270341207349,
472
+ "eval_AGE_f1": 0.9633416458852868,
473
+ "eval_CITY_f1": 0.9842361227570016,
474
+ "eval_COUNTRY_f1": 0.9843467790487658,
475
+ "eval_CREDITCARDCVV_f1": 0.9878760664571171,
476
+ "eval_CREDITCARDNUMBER_f1": 0.9416398138202648,
477
+ "eval_DATEOFBIRTH_f1": 0.9023332645054718,
478
+ "eval_DATE_f1": 0.9600118046333186,
479
+ "eval_EMAIL_f1": 0.990012854741422,
480
+ "eval_EXPIRYDATE_f1": 0.9912280701754386,
481
+ "eval_ORGANIZATION_f1": 0.991032304086416,
482
+ "eval_PAN_CARD_f1": 0.9867424242424242,
483
+ "eval_PERSON_f1": 0.9877905928996216,
484
+ "eval_PHONENUMBER_f1": 0.9857583321098224,
485
+ "eval_PINCODE_f1": 0.9907161803713527,
486
+ "eval_SECONDARYADDRESS_f1": 0.9877938061131848,
487
+ "eval_STATE_f1": 0.9909125815947779,
488
+ "eval_TIME_f1": 0.9819761530640541,
489
+ "eval_URL_f1": 0.9948626312262676,
490
+ "eval_loss": 0.020110823214054108,
491
+ "eval_overall_accuracy": 0.9951943362620375,
492
+ "eval_overall_f1": 0.9803088380243128,
493
+ "eval_overall_precision": 0.9744924065102607,
494
+ "eval_overall_recall": 0.9861951192640335,
495
+ "eval_runtime": 251.2008,
496
+ "eval_samples_per_second": 60.991,
497
+ "eval_steps_per_second": 15.251,
498
+ "step": 30642
499
+ },
500
+ {
501
+ "epoch": 2.0,
502
+ "step": 30642,
503
+ "total_flos": 1.1909329832222172e+16,
504
+ "train_loss": 0.06240972641763811,
505
+ "train_runtime": 5473.6718,
506
+ "train_samples_per_second": 22.391,
507
+ "train_steps_per_second": 5.598
508
+ },
509
+ {
510
+ "epoch": 2.0,
511
+ "eval_AADHAR_CARD_f1": 0.9837270341207349,
512
+ "eval_AGE_f1": 0.9633416458852868,
513
+ "eval_CITY_f1": 0.9842361227570016,
514
+ "eval_COUNTRY_f1": 0.9843467790487658,
515
+ "eval_CREDITCARDCVV_f1": 0.9878760664571171,
516
+ "eval_CREDITCARDNUMBER_f1": 0.9416398138202648,
517
+ "eval_DATEOFBIRTH_f1": 0.9023332645054718,
518
+ "eval_DATE_f1": 0.9600118046333186,
519
+ "eval_EMAIL_f1": 0.990012854741422,
520
+ "eval_EXPIRYDATE_f1": 0.9912280701754386,
521
+ "eval_ORGANIZATION_f1": 0.991032304086416,
522
+ "eval_PAN_CARD_f1": 0.9867424242424242,
523
+ "eval_PERSON_f1": 0.9877905928996216,
524
+ "eval_PHONENUMBER_f1": 0.9857583321098224,
525
+ "eval_PINCODE_f1": 0.9907161803713527,
526
+ "eval_SECONDARYADDRESS_f1": 0.9877938061131848,
527
+ "eval_STATE_f1": 0.9909125815947779,
528
+ "eval_TIME_f1": 0.9819761530640541,
529
+ "eval_URL_f1": 0.9948626312262676,
530
+ "eval_loss": 0.020110823214054108,
531
+ "eval_overall_accuracy": 0.9951943362620375,
532
+ "eval_overall_f1": 0.9803088380243128,
533
+ "eval_overall_precision": 0.9744924065102607,
534
+ "eval_overall_recall": 0.9861951192640335,
535
+ "eval_runtime": 249.9695,
536
+ "eval_samples_per_second": 61.291,
537
+ "eval_steps_per_second": 15.326,
538
+ "step": 30642
539
+ }
540
+ ],
541
+ "logging_steps": 500,
542
+ "max_steps": 30642,
543
+ "num_input_tokens_seen": 0,
544
+ "num_train_epochs": 2,
545
+ "save_steps": 500,
546
+ "total_flos": 1.1909329832222172e+16,
547
+ "train_batch_size": 4,
548
+ "trial_name": null,
549
+ "trial_params": null
550
+ }