nicpopovic commited on
Commit
b244637
·
verified ·
1 Parent(s): 9979f92

Upload 8 files

Browse files
data/meta-llama/Llama-3.2-1B-Instruct/STOKE_500_wikiqa/checkpoints/span_classifier/B7Ogkftne6/checkpoint.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8ed0c5f360111a0cf03c113e844eb99d5a25966859f2b746cdfecb0641c1b34
3
+ size 8439912
data/meta-llama/Llama-3.2-1B-Instruct/STOKE_500_wikiqa/checkpoints/span_classifier/B7Ogkftne6/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "meta-llama/Llama-3.2-1B-Instruct",
3
+ "type": "span_classifier",
4
+ "label_map": [
5
+ "no_span",
6
+ "span"
7
+ ],
8
+ "learning_rate": 0.001,
9
+ "classifier_dim": 4096,
10
+ "loss_weights": [
11
+ 1.0,
12
+ 50.0
13
+ ],
14
+ "identifier": "B7Ogkftne6",
15
+ "best_f1_validation": 0.5553668737411499,
16
+ "best_f1_validation_classwise": {
17
+ "span": {
18
+ "p": 0.3978736698627472,
19
+ "r": 0.9192339777946472,
20
+ "f": 0.5553668737411499,
21
+ "s": 4804.0
22
+ },
23
+ "macro": {
24
+ "p": 0.3978736698627472,
25
+ "r": 0.9192339777946472,
26
+ "f": 0.5553668737411499
27
+ }
28
+ }
29
+ }
data/meta-llama/Llama-3.2-1B-Instruct/STOKE_500_wikiqa/checkpoints/span_classifier/B7Ogkftne6/config_train.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "path": "data/meta-llama/Llama-3.2-1B-Instruct/STOKE_500_wikiqa",
3
+ "splits": [
4
+ "train",
5
+ "validation"
6
+ ],
7
+ "layers": [
8
+ 8,
9
+ 9,
10
+ 10,
11
+ 11,
12
+ 12
13
+ ],
14
+ "hfcache": "",
15
+ "classifier_dims": [
16
+ 4096
17
+ ],
18
+ "learning_rates": [
19
+ 0.0001,
20
+ 5e-05,
21
+ 0.0003,
22
+ 0.0005,
23
+ 0.001
24
+ ],
25
+ "cuda": true,
26
+ "n_steps_per_epoch": 500,
27
+ "n_epochs": 30,
28
+ "batch_size": 4,
29
+ "balance_loss": false,
30
+ "loss_weights_span": [
31
+ [
32
+ 1.0,
33
+ 1.0
34
+ ],
35
+ [
36
+ 1.0,
37
+ 50.0
38
+ ],
39
+ [
40
+ 1.0,
41
+ 100.0
42
+ ]
43
+ ],
44
+ "time": 1728553144.1331656,
45
+ "config_dataset": {
46
+ "generation_kwargs": {
47
+ "max_new_tokens": 500,
48
+ "repetition_penalty": 1.2
49
+ },
50
+ "model_id": "meta-llama/Llama-3.2-1B-Instruct",
51
+ "flair_model_name": "flair/ner-english-ontonotes-large"
52
+ }
53
+ }
data/meta-llama/Llama-3.2-1B-Instruct/STOKE_500_wikiqa/checkpoints/token_classifier/ShBuzLjzh9/checkpoint.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95f1f6318f661446e7ff38af3e31293ba1f0fc88ed94aee0a8c47ceaf6ec2b1c
3
+ size 33884328
data/meta-llama/Llama-3.2-1B-Instruct/STOKE_500_wikiqa/checkpoints/token_classifier/ShBuzLjzh9/config.json ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "layer": 10,
3
+ "model": "meta-llama/Llama-3.2-1B-Instruct",
4
+ "type": "token_classifier",
5
+ "label_map": [
6
+ "O",
7
+ "CARDINAL",
8
+ "DATE",
9
+ "EVENT",
10
+ "FAC",
11
+ "GPE",
12
+ "LANGUAGE",
13
+ "LAW",
14
+ "LOC",
15
+ "MONEY",
16
+ "NORP",
17
+ "ORDINAL",
18
+ "ORG",
19
+ "PERCENT",
20
+ "PERSON",
21
+ "PRODUCT",
22
+ "QUANTITY",
23
+ "TIME",
24
+ "WORK_OF_ART"
25
+ ],
26
+ "learning_rate": 0.001,
27
+ "classifier_dim": 4096,
28
+ "loss_weights": [
29
+ 1.0,
30
+ 1.0,
31
+ 1.0,
32
+ 1.0,
33
+ 1.0,
34
+ 1.0,
35
+ 1.0,
36
+ 1.0,
37
+ 1.0,
38
+ 1.0,
39
+ 1.0,
40
+ 1.0,
41
+ 1.0,
42
+ 1.0,
43
+ 1.0,
44
+ 1.0,
45
+ 1.0,
46
+ 1.0,
47
+ 1.0
48
+ ],
49
+ "identifier": "ShBuzLjzh9",
50
+ "best_f1_validation": 0.8960928916931152,
51
+ "best_f1_validation_classwise": {
52
+ "CARDINAL": {
53
+ "p": 0.8828781247138977,
54
+ "r": 0.9042496085166931,
55
+ "f": 0.8934361338615417,
56
+ "s": 1859.0
57
+ },
58
+ "DATE": {
59
+ "p": 0.9753617644309998,
60
+ "r": 0.9696733951568604,
61
+ "f": 0.9725092649459839,
62
+ "s": 5144.0
63
+ },
64
+ "EVENT": {
65
+ "p": 0.8030303120613098,
66
+ "r": 0.7194570302963257,
67
+ "f": 0.758949875831604,
68
+ "s": 221.0
69
+ },
70
+ "FAC": {
71
+ "p": 0.7110481858253479,
72
+ "r": 0.7011173367500305,
73
+ "f": 0.7060478329658508,
74
+ "s": 358.0
75
+ },
76
+ "GPE": {
77
+ "p": 0.8975331783294678,
78
+ "r": 0.9113680124282837,
79
+ "f": 0.9043976664543152,
80
+ "s": 1038.0
81
+ },
82
+ "LANGUAGE": {
83
+ "p": 0.9354838728904724,
84
+ "r": 0.8787878751754761,
85
+ "f": 0.90625,
86
+ "s": 66.0
87
+ },
88
+ "LAW": {
89
+ "p": 0.8544303774833679,
90
+ "r": 0.7714285850524902,
91
+ "f": 0.8108107447624207,
92
+ "s": 175.0
93
+ },
94
+ "LOC": {
95
+ "p": 0.6557376980781555,
96
+ "r": 0.6425702571868896,
97
+ "f": 0.6490872502326965,
98
+ "s": 249.0
99
+ },
100
+ "MONEY": {
101
+ "p": 0.9277108311653137,
102
+ "r": 0.9277108311653137,
103
+ "f": 0.9277108311653137,
104
+ "s": 166.0
105
+ },
106
+ "NORP": {
107
+ "p": 0.8764045238494873,
108
+ "r": 0.7852349281311035,
109
+ "f": 0.8283185958862305,
110
+ "s": 298.0
111
+ },
112
+ "ORDINAL": {
113
+ "p": 0.8032786846160889,
114
+ "r": 0.8909090757369995,
115
+ "f": 0.8448275923728943,
116
+ "s": 55.0
117
+ },
118
+ "ORG": {
119
+ "p": 0.8269370198249817,
120
+ "r": 0.8305454254150391,
121
+ "f": 0.8287373185157776,
122
+ "s": 1375.0
123
+ },
124
+ "PERCENT": {
125
+ "p": 0.939130425453186,
126
+ "r": 0.7248322367668152,
127
+ "f": 0.8181818127632141,
128
+ "s": 149.0
129
+ },
130
+ "PERSON": {
131
+ "p": 0.9401294589042664,
132
+ "r": 0.9535011053085327,
133
+ "f": 0.9467681050300598,
134
+ "s": 1828.0
135
+ },
136
+ "PRODUCT": {
137
+ "p": 0.7434554696083069,
138
+ "r": 0.4610389471054077,
139
+ "f": 0.56913822889328,
140
+ "s": 308.0
141
+ },
142
+ "QUANTITY": {
143
+ "p": 0.8620689511299133,
144
+ "r": 0.8796296119689941,
145
+ "f": 0.8707607984542847,
146
+ "s": 540.0
147
+ },
148
+ "TIME": {
149
+ "p": 0.6315789222717285,
150
+ "r": 0.4285714328289032,
151
+ "f": 0.5106382369995117,
152
+ "s": 56.0
153
+ },
154
+ "WORK_OF_ART": {
155
+ "p": 0.8444194197654724,
156
+ "r": 0.8221734166145325,
157
+ "f": 0.8331480026245117,
158
+ "s": 911.0
159
+ },
160
+ "macro": {
161
+ "p": 0.8394787311553955,
162
+ "r": 0.7890443801879883,
163
+ "f": 0.8099843859672546
164
+ }
165
+ }
166
+ }
data/meta-llama/Llama-3.2-1B-Instruct/STOKE_500_wikiqa/checkpoints/token_classifier/ShBuzLjzh9/config_train.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "path": "data/meta-llama/Llama-3.2-1B-Instruct/STOKE_500_wikiqa",
3
+ "splits": [
4
+ "train",
5
+ "validation"
6
+ ],
7
+ "layers": [
8
+ 8,
9
+ 9,
10
+ 10,
11
+ 11,
12
+ 12
13
+ ],
14
+ "hfcache": "",
15
+ "classifier_dims": [
16
+ 4096
17
+ ],
18
+ "learning_rates": [
19
+ 0.0001,
20
+ 5e-05,
21
+ 0.0003,
22
+ 0.0005,
23
+ 0.001
24
+ ],
25
+ "cuda": true,
26
+ "n_steps_per_epoch": 500,
27
+ "n_epochs": 30,
28
+ "batch_size": 4,
29
+ "balance_loss": false,
30
+ "loss_weights_span": [
31
+ [
32
+ 1.0,
33
+ 1.0
34
+ ],
35
+ [
36
+ 1.0,
37
+ 50.0
38
+ ],
39
+ [
40
+ 1.0,
41
+ 100.0
42
+ ]
43
+ ],
44
+ "time": 1728553144.1331656,
45
+ "config_dataset": {
46
+ "generation_kwargs": {
47
+ "max_new_tokens": 500,
48
+ "repetition_penalty": 1.2
49
+ },
50
+ "model_id": "meta-llama/Llama-3.2-1B-Instruct",
51
+ "flair_model_name": "flair/ner-english-ontonotes-large"
52
+ }
53
+ }
data/meta-llama/Llama-3.2-1B-Instruct/STOKE_500_wikiqa/config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "generation_kwargs": {
3
+ "max_new_tokens": 500,
4
+ "repetition_penalty": 1.2
5
+ },
6
+ "model_id": "meta-llama/Llama-3.2-1B-Instruct",
7
+ "flair_model_name": "flair/ner-english-ontonotes-large"
8
+ }
data/meta-llama/Llama-3.2-1B-Instruct/STOKE_500_wikiqa/stoke_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "default": {
3
+ "classifier_token": "data/meta-llama/Llama-3.2-1B-Instruct/STOKE_500_wikiqa/checkpoints/token_classifier/ShBuzLjzh9",
4
+ "classifier_span": "data/meta-llama/Llama-3.2-1B-Instruct/STOKE_500_wikiqa/checkpoints/span_classifier/B7Ogkftne6"
5
+ },
6
+ "basic": {
7
+ "classifier_token": "data/meta-llama/Llama-3.2-1B-Instruct/STOKE_500_wikiqa/checkpoints/token_classifier/ShBuzLjzh9",
8
+ "classifier_span": "data/meta-llama/Llama-3.2-1B-Instruct/STOKE_500_wikiqa/checkpoints/span_classifier/lx17UmlUbN"
9
+ }
10
+ }