nicpopovic commited on
Commit
a65e909
·
verified ·
1 Parent(s): f19c5a7

Upload 11 files

Browse files
data/meta-llama/Llama-3.2-1B/STOKE_100/checkpoints/span_classifier/Rxi8b70XJA/checkpoint.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25951d9b73437a7aa344f4c207cbda2f88d9bf5fa94d1a779617948b18a1c4ed
3
+ size 8439912
data/meta-llama/Llama-3.2-1B/STOKE_100/checkpoints/span_classifier/Rxi8b70XJA/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "meta-llama/Llama-3.2-1B",
3
+ "type": "span_classifier",
4
+ "label_map": [
5
+ "no_span",
6
+ "span"
7
+ ],
8
+ "learning_rate": 0.0003,
9
+ "classifier_dim": 4096,
10
+ "loss_weights": [
11
+ 1.0,
12
+ 1.0
13
+ ],
14
+ "identifier": "Rxi8b70XJA",
15
+ "best_f1_validation": 0.8677362203598022,
16
+ "best_f1_validation_classwise": {
17
+ "span": {
18
+ "p": 0.896858811378479,
19
+ "r": 0.8404456377029419,
20
+ "f": 0.867736279964447,
21
+ "s": 24324.0
22
+ },
23
+ "macro": {
24
+ "p": 0.896858811378479,
25
+ "r": 0.8404456377029419,
26
+ "f": 0.867736279964447
27
+ }
28
+ }
29
+ }
data/meta-llama/Llama-3.2-1B/STOKE_100/checkpoints/span_classifier/Rxi8b70XJA/config_train.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "path": "data/meta-llama/Llama-3.2-1B/STOKE_100",
3
+ "splits": [
4
+ "train",
5
+ "validation"
6
+ ],
7
+ "layers": [
8
+ 8,
9
+ 9,
10
+ 10,
11
+ 11,
12
+ 12
13
+ ],
14
+ "hfcache": "",
15
+ "classifier_dims": [
16
+ 4096
17
+ ],
18
+ "learning_rates": [
19
+ 0.0001,
20
+ 5e-05,
21
+ 0.0003
22
+ ],
23
+ "cuda": true,
24
+ "n_steps_per_epoch": 10000,
25
+ "n_epochs": 30,
26
+ "batch_size": 8,
27
+ "balance_loss": false,
28
+ "loss_weights_span": [
29
+ [
30
+ 1.0,
31
+ 1.0
32
+ ],
33
+ [
34
+ 1.0,
35
+ 50.0
36
+ ],
37
+ [
38
+ 1.0,
39
+ 100.0
40
+ ]
41
+ ],
42
+ "time": 1727765390.5829365,
43
+ "config_dataset": {
44
+ "generation_kwargs": {
45
+ "max_new_tokens": 100,
46
+ "repetition_penalty": 1.2
47
+ },
48
+ "model_id": "meta-llama/Llama-3.2-1B",
49
+ "flair_model_name": "flair/ner-english-ontonotes-large"
50
+ }
51
+ }
data/meta-llama/Llama-3.2-1B/STOKE_100/checkpoints/token_classifier/dR8xQB4ODU/checkpoint.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dce5b3038d8767430a8bba16af61ec6af67c9d1aedc75a9f34c01feebac09b6e
3
+ size 33884328
data/meta-llama/Llama-3.2-1B/STOKE_100/checkpoints/token_classifier/dR8xQB4ODU/config.json ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "layer": 10,
3
+ "model": "meta-llama/Llama-3.2-1B",
4
+ "type": "token_classifier",
5
+ "label_map": [
6
+ "O",
7
+ "CARDINAL",
8
+ "DATE",
9
+ "EVENT",
10
+ "FAC",
11
+ "GPE",
12
+ "LANGUAGE",
13
+ "LAW",
14
+ "LOC",
15
+ "MONEY",
16
+ "NORP",
17
+ "ORDINAL",
18
+ "ORG",
19
+ "PERCENT",
20
+ "PERSON",
21
+ "PRODUCT",
22
+ "QUANTITY",
23
+ "TIME",
24
+ "WORK_OF_ART"
25
+ ],
26
+ "learning_rate": 5e-05,
27
+ "classifier_dim": 4096,
28
+ "loss_weights": [
29
+ 1.0,
30
+ 1.0,
31
+ 1.0,
32
+ 1.0,
33
+ 1.0,
34
+ 1.0,
35
+ 1.0,
36
+ 1.0,
37
+ 1.0,
38
+ 1.0,
39
+ 1.0,
40
+ 1.0,
41
+ 1.0,
42
+ 1.0,
43
+ 1.0,
44
+ 1.0,
45
+ 1.0,
46
+ 1.0,
47
+ 1.0
48
+ ],
49
+ "identifier": "dR8xQB4ODU",
50
+ "best_f1_validation": 0.9056437015533447,
51
+ "best_f1_validation_classwise": {
52
+ "CARDINAL": {
53
+ "p": 0.8679801225662231,
54
+ "r": 0.8777581453323364,
55
+ "f": 0.8728417754173279,
56
+ "s": 10741.0
57
+ },
58
+ "DATE": {
59
+ "p": 0.9519810676574707,
60
+ "r": 0.9389873743057251,
61
+ "f": 0.9454395771026611,
62
+ "s": 8572.0
63
+ },
64
+ "EVENT": {
65
+ "p": 0.8587140440940857,
66
+ "r": 0.8319672346115112,
67
+ "f": 0.8451290726661682,
68
+ "s": 1220.0
69
+ },
70
+ "FAC": {
71
+ "p": 0.8515185713768005,
72
+ "r": 0.8122317790985107,
73
+ "f": 0.8314113020896912,
74
+ "s": 932.0
75
+ },
76
+ "GPE": {
77
+ "p": 0.9000998735427856,
78
+ "r": 0.9094448685646057,
79
+ "f": 0.904748260974884,
80
+ "s": 6935.0
81
+ },
82
+ "LANGUAGE": {
83
+ "p": 0.75,
84
+ "r": 0.7200000286102295,
85
+ "f": 0.7346938848495483,
86
+ "s": 25.0
87
+ },
88
+ "LAW": {
89
+ "p": 0.8709677457809448,
90
+ "r": 0.73828125,
91
+ "f": 0.7991543412208557,
92
+ "s": 256.0
93
+ },
94
+ "LOC": {
95
+ "p": 0.8258426785469055,
96
+ "r": 0.7101449370384216,
97
+ "f": 0.7636363506317139,
98
+ "s": 414.0
99
+ },
100
+ "MONEY": {
101
+ "p": 0.876042902469635,
102
+ "r": 0.8626760840415955,
103
+ "f": 0.8693081140518188,
104
+ "s": 1704.0
105
+ },
106
+ "NORP": {
107
+ "p": 0.9160357713699341,
108
+ "r": 0.887333333492279,
109
+ "f": 0.9014561772346497,
110
+ "s": 1500.0
111
+ },
112
+ "ORDINAL": {
113
+ "p": 0.9303238391876221,
114
+ "r": 0.9498997926712036,
115
+ "f": 0.9400099515914917,
116
+ "s": 998.0
117
+ },
118
+ "ORG": {
119
+ "p": 0.8974575400352478,
120
+ "r": 0.8792765140533447,
121
+ "f": 0.8882739543914795,
122
+ "s": 9675.0
123
+ },
124
+ "PERCENT": {
125
+ "p": 0.8629592657089233,
126
+ "r": 0.8083720803260803,
127
+ "f": 0.8347742557525635,
128
+ "s": 1075.0
129
+ },
130
+ "PERSON": {
131
+ "p": 0.9707135558128357,
132
+ "r": 0.9713156223297119,
133
+ "f": 0.9710144996643066,
134
+ "s": 12899.0
135
+ },
136
+ "PRODUCT": {
137
+ "p": 0.7828418016433716,
138
+ "r": 0.7564767003059387,
139
+ "f": 0.7694334387779236,
140
+ "s": 386.0
141
+ },
142
+ "QUANTITY": {
143
+ "p": 0.8409090638160706,
144
+ "r": 0.7758846879005432,
145
+ "f": 0.8070893287658691,
146
+ "s": 763.0
147
+ },
148
+ "TIME": {
149
+ "p": 0.8710959553718567,
150
+ "r": 0.8373362421989441,
151
+ "f": 0.8538825511932373,
152
+ "s": 1832.0
153
+ },
154
+ "WORK_OF_ART": {
155
+ "p": 0.7803030014038086,
156
+ "r": 0.7152777910232544,
157
+ "f": 0.7463768124580383,
158
+ "s": 576.0
159
+ },
160
+ "macro": {
161
+ "p": 0.8669881820678711,
162
+ "r": 0.8323702216148376,
163
+ "f": 0.8488152027130127
164
+ }
165
+ }
166
+ }
data/meta-llama/Llama-3.2-1B/STOKE_100/checkpoints/token_classifier/dR8xQB4ODU/config_train.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "path": "data/meta-llama/Llama-3.2-1B/STOKE_100",
3
+ "splits": [
4
+ "train",
5
+ "validation"
6
+ ],
7
+ "layers": [
8
+ 8,
9
+ 9,
10
+ 10,
11
+ 11,
12
+ 12
13
+ ],
14
+ "hfcache": "",
15
+ "classifier_dims": [
16
+ 4096
17
+ ],
18
+ "learning_rates": [
19
+ 0.0001,
20
+ 5e-05,
21
+ 0.0003
22
+ ],
23
+ "cuda": true,
24
+ "n_steps_per_epoch": 10000,
25
+ "n_epochs": 30,
26
+ "batch_size": 8,
27
+ "balance_loss": false,
28
+ "loss_weights_span": [
29
+ [
30
+ 1.0,
31
+ 1.0
32
+ ],
33
+ [
34
+ 1.0,
35
+ 50.0
36
+ ],
37
+ [
38
+ 1.0,
39
+ 100.0
40
+ ]
41
+ ],
42
+ "time": 1727765390.5829365,
43
+ "config_dataset": {
44
+ "generation_kwargs": {
45
+ "max_new_tokens": 100,
46
+ "repetition_penalty": 1.2
47
+ },
48
+ "model_id": "meta-llama/Llama-3.2-1B",
49
+ "flair_model_name": "flair/ner-english-ontonotes-large"
50
+ }
51
+ }
data/meta-llama/Llama-3.2-1B/STOKE_100/checkpoints/token_classifier/pbK46jjAVx/checkpoint.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f32816959f5fd27967c754a61b07d8ae6c92b7881e2fbb6a68b54b8c0c575122
3
+ size 33884328
data/meta-llama/Llama-3.2-1B/STOKE_100/checkpoints/token_classifier/pbK46jjAVx/config.json ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "layer": 10,
3
+ "model": "meta-llama/Llama-3.2-1B",
4
+ "type": "token_classifier",
5
+ "label_map": [
6
+ "O",
7
+ "CARDINAL",
8
+ "DATE",
9
+ "EVENT",
10
+ "FAC",
11
+ "GPE",
12
+ "LANGUAGE",
13
+ "LAW",
14
+ "LOC",
15
+ "MONEY",
16
+ "NORP",
17
+ "ORDINAL",
18
+ "ORG",
19
+ "PERCENT",
20
+ "PERSON",
21
+ "PRODUCT",
22
+ "QUANTITY",
23
+ "TIME",
24
+ "WORK_OF_ART"
25
+ ],
26
+ "learning_rate": 0.0003,
27
+ "classifier_dim": 4096,
28
+ "loss_weights": [
29
+ 1.0,
30
+ 1.0,
31
+ 1.0,
32
+ 1.0,
33
+ 1.0,
34
+ 1.0,
35
+ 1.0,
36
+ 1.0,
37
+ 1.0,
38
+ 1.0,
39
+ 1.0,
40
+ 1.0,
41
+ 1.0,
42
+ 1.0,
43
+ 1.0,
44
+ 1.0,
45
+ 1.0,
46
+ 1.0,
47
+ 1.0
48
+ ],
49
+ "identifier": "pbK46jjAVx",
50
+ "best_f1_validation": 0.9048610329627991,
51
+ "best_f1_validation_classwise": {
52
+ "CARDINAL": {
53
+ "p": 0.8730558156967163,
54
+ "r": 0.8727306723594666,
55
+ "f": 0.872893214225769,
56
+ "s": 10741.0
57
+ },
58
+ "DATE": {
59
+ "p": 0.9534441828727722,
60
+ "r": 0.9365375638008118,
61
+ "f": 0.944915235042572,
62
+ "s": 8572.0
63
+ },
64
+ "EVENT": {
65
+ "p": 0.8540268540382385,
66
+ "r": 0.83442622423172,
67
+ "f": 0.844112753868103,
68
+ "s": 1220.0
69
+ },
70
+ "FAC": {
71
+ "p": 0.8227027058601379,
72
+ "r": 0.8165236115455627,
73
+ "f": 0.8196015357971191,
74
+ "s": 932.0
75
+ },
76
+ "GPE": {
77
+ "p": 0.9014912247657776,
78
+ "r": 0.9065608978271484,
79
+ "f": 0.9040189981460571,
80
+ "s": 6935.0
81
+ },
82
+ "LANGUAGE": {
83
+ "p": 0.7272727489471436,
84
+ "r": 0.6399999856948853,
85
+ "f": 0.6808510422706604,
86
+ "s": 25.0
87
+ },
88
+ "LAW": {
89
+ "p": 0.8500000238418579,
90
+ "r": 0.73046875,
91
+ "f": 0.7857142686843872,
92
+ "s": 256.0
93
+ },
94
+ "LOC": {
95
+ "p": 0.8867924809455872,
96
+ "r": 0.6811594367027283,
97
+ "f": 0.7704918384552002,
98
+ "s": 414.0
99
+ },
100
+ "MONEY": {
101
+ "p": 0.873665452003479,
102
+ "r": 0.8644366264343262,
103
+ "f": 0.8690265417098999,
104
+ "s": 1704.0
105
+ },
106
+ "NORP": {
107
+ "p": 0.9220505356788635,
108
+ "r": 0.875333309173584,
109
+ "f": 0.898084819316864,
110
+ "s": 1500.0
111
+ },
112
+ "ORDINAL": {
113
+ "p": 0.9244186282157898,
114
+ "r": 0.9559118151664734,
115
+ "f": 0.9399014711380005,
116
+ "s": 998.0
117
+ },
118
+ "ORG": {
119
+ "p": 0.8920637965202332,
120
+ "r": 0.8841343522071838,
121
+ "f": 0.888081431388855,
122
+ "s": 9675.0
123
+ },
124
+ "PERCENT": {
125
+ "p": 0.8530852198600769,
126
+ "r": 0.8102325797080994,
127
+ "f": 0.8311069011688232,
128
+ "s": 1075.0
129
+ },
130
+ "PERSON": {
131
+ "p": 0.9692212343215942,
132
+ "r": 0.9716256856918335,
133
+ "f": 0.9704219698905945,
134
+ "s": 12899.0
135
+ },
136
+ "PRODUCT": {
137
+ "p": 0.7886179089546204,
138
+ "r": 0.7538859844207764,
139
+ "f": 0.7708609104156494,
140
+ "s": 386.0
141
+ },
142
+ "QUANTITY": {
143
+ "p": 0.8215258717536926,
144
+ "r": 0.7903014421463013,
145
+ "f": 0.8056111931800842,
146
+ "s": 763.0
147
+ },
148
+ "TIME": {
149
+ "p": 0.8752886652946472,
150
+ "r": 0.8275108933448792,
151
+ "f": 0.8507295250892639,
152
+ "s": 1832.0
153
+ },
154
+ "WORK_OF_ART": {
155
+ "p": 0.7937743067741394,
156
+ "r": 0.7083333134651184,
157
+ "f": 0.7486238479614258,
158
+ "s": 576.0
159
+ },
160
+ "macro": {
161
+ "p": 0.8656943440437317,
162
+ "r": 0.8255618214607239,
163
+ "f": 0.8441693186759949
164
+ }
165
+ }
166
+ }
data/meta-llama/Llama-3.2-1B/STOKE_100/checkpoints/token_classifier/pbK46jjAVx/config_train.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "path": "data/meta-llama/Llama-3.2-1B/STOKE_100",
3
+ "splits": [
4
+ "train",
5
+ "validation"
6
+ ],
7
+ "layers": [
8
+ 8,
9
+ 9,
10
+ 10,
11
+ 11,
12
+ 12
13
+ ],
14
+ "hfcache": "",
15
+ "classifier_dims": [
16
+ 4096
17
+ ],
18
+ "learning_rates": [
19
+ 0.0001,
20
+ 5e-05,
21
+ 0.0003
22
+ ],
23
+ "cuda": true,
24
+ "n_steps_per_epoch": 10000,
25
+ "n_epochs": 30,
26
+ "batch_size": 8,
27
+ "balance_loss": false,
28
+ "loss_weights_span": [
29
+ [
30
+ 1.0,
31
+ 1.0
32
+ ],
33
+ [
34
+ 1.0,
35
+ 50.0
36
+ ],
37
+ [
38
+ 1.0,
39
+ 100.0
40
+ ]
41
+ ],
42
+ "time": 1727765390.5829365,
43
+ "config_dataset": {
44
+ "generation_kwargs": {
45
+ "max_new_tokens": 100,
46
+ "repetition_penalty": 1.2
47
+ },
48
+ "model_id": "meta-llama/Llama-3.2-1B",
49
+ "flair_model_name": "flair/ner-english-ontonotes-large"
50
+ }
51
+ }
data/meta-llama/Llama-3.2-1B/STOKE_100/config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "generation_kwargs": {
3
+ "max_new_tokens": 100,
4
+ "repetition_penalty": 1.2
5
+ },
6
+ "model_id": "meta-llama/Llama-3.2-1B",
7
+ "flair_model_name": "flair/ner-english-ontonotes-large"
8
+ }
data/meta-llama/Llama-3.2-1B/STOKE_100/stoke_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "default": {
3
+ "classifier_token": "data/meta-llama/Llama-3.2-1B/STOKE_100/checkpoints/token_classifier/pbK46jjAVx",
4
+ "classifier_span": "data/meta-llama/Llama-3.2-1B/STOKE_100/checkpoints/span_classifier/Rxi8b70XJA"
5
+ },
6
+ "basic": {
7
+ "classifier_token": "data/meta-llama/Llama-3.2-1B/STOKE_100/checkpoints/token_classifier/dR8xQB4ODU",
8
+ "classifier_span": "data/meta-llama/Llama-3.2-1B/STOKE_100/checkpoints/span_classifier/Rxi8b70XJA"
9
+ }
10
+ }