Md Mushfiqur Rahman commited on
Commit
a49562c
1 Parent(s): 5a35ca6

Upload with huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ tags:
4
+ - canine
5
+ - pretrained-on-english-language
6
+ ---
7
+
8
+ ### How to use
9
+
10
+ Here is how to use this model:
11
+
12
+ ```python
13
+ from transformers import CanineModel
14
+ model = CanineModel.from_pretrained('mushfiqur11/<repo name>')
15
+ ```
all_results.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 38.81,
3
+ "eval_accuracy_score": 0.9640888094173745,
4
+ "eval_f1": 0.8271405492730209,
5
+ "eval_loss": 0.20800545811653137,
6
+ "eval_precision": 0.8258064516129032,
7
+ "eval_recall": 0.8284789644012945,
8
+ "eval_runtime": 4.1985,
9
+ "eval_samples": 302,
10
+ "eval_samples_per_second": 71.931,
11
+ "eval_steps_per_second": 9.051,
12
+ "test_accuracy_score": 0.9515356438433361,
13
+ "test_f1": 0.7057644110275689,
14
+ "test_loss": 0.284976989030838,
15
+ "test_precision": 0.685491723466407,
16
+ "test_recall": 0.7272727272727273,
17
+ "test_runtime": 8.4313,
18
+ "test_samples": 605,
19
+ "test_samples_per_second": 71.756,
20
+ "test_steps_per_second": 9.014,
21
+ "train_loss": 0.0309662873011369,
22
+ "train_runtime": 2918.4318,
23
+ "train_samples": 2116,
24
+ "train_samples_per_second": 164.472,
25
+ "train_steps_per_second": 5.14
26
+ }
config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-multilingual-cased",
3
+ "architectures": [
4
+ "BertForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "directionality": "bidi",
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "O",
14
+ "1": "B-DATE",
15
+ "2": "I-DATE",
16
+ "3": "B-PER",
17
+ "4": "I-PER",
18
+ "5": "B-ORG",
19
+ "6": "I-ORG",
20
+ "7": "B-LOC",
21
+ "8": "I-LOC"
22
+ },
23
+ "initializer_range": 0.02,
24
+ "intermediate_size": 3072,
25
+ "label2id": {
26
+ "B-DATE": 1,
27
+ "B-LOC": 7,
28
+ "B-ORG": 5,
29
+ "B-PER": 3,
30
+ "I-DATE": 2,
31
+ "I-LOC": 8,
32
+ "I-ORG": 6,
33
+ "I-PER": 4,
34
+ "O": 0
35
+ },
36
+ "layer_norm_eps": 1e-12,
37
+ "max_position_embeddings": 512,
38
+ "model_type": "bert",
39
+ "num_attention_heads": 12,
40
+ "num_hidden_layers": 12,
41
+ "pad_token_id": 0,
42
+ "pooler_fc_size": 768,
43
+ "pooler_num_attention_heads": 12,
44
+ "pooler_num_fc_layers": 3,
45
+ "pooler_size_per_head": 128,
46
+ "pooler_type": "first_token_transform",
47
+ "position_embedding_type": "absolute",
48
+ "torch_dtype": "float32",
49
+ "transformers_version": "4.17.0",
50
+ "type_vocab_size": 2,
51
+ "use_cache": true,
52
+ "vocab_size": 119547
53
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60a47a21d6eb17a25743da46b864eb59100bfbd940c57f8c8a9ba6471d5ed878
3
+ size 709165101
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
test_predictions.txt ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": false, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "add_prefix_space": false, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "bert-base-multilingual-cased", "tokenizer_class": "BertTokenizer"}
trainer_state.json ADDED
@@ -0,0 +1,211 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8271405492730209,
3
+ "best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/mbert/mbert-base-finetuned-masakhaner-kin/checkpoint-1000",
4
+ "epoch": 38.80597014925373,
5
+ "global_step": 2600,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 2.99,
12
+ "eval_accuracy_score": 0.9552972731336612,
13
+ "eval_f1": 0.8061538461538461,
14
+ "eval_loss": 0.14030461013317108,
15
+ "eval_precision": 0.7683284457478006,
16
+ "eval_recall": 0.8478964401294499,
17
+ "eval_runtime": 4.2191,
18
+ "eval_samples_per_second": 71.58,
19
+ "eval_steps_per_second": 9.007,
20
+ "step": 200
21
+ },
22
+ {
23
+ "epoch": 5.97,
24
+ "eval_accuracy_score": 0.9608106094471762,
25
+ "eval_f1": 0.8109375,
26
+ "eval_loss": 0.17163802683353424,
27
+ "eval_precision": 0.783987915407855,
28
+ "eval_recall": 0.8398058252427184,
29
+ "eval_runtime": 4.2278,
30
+ "eval_samples_per_second": 71.432,
31
+ "eval_steps_per_second": 8.988,
32
+ "step": 400
33
+ },
34
+ {
35
+ "epoch": 7.46,
36
+ "learning_rate": 4.865771812080537e-05,
37
+ "loss": 0.1428,
38
+ "step": 500
39
+ },
40
+ {
41
+ "epoch": 8.96,
42
+ "eval_accuracy_score": 0.9573834003874236,
43
+ "eval_f1": 0.7830940988835726,
44
+ "eval_loss": 0.2280413955450058,
45
+ "eval_precision": 0.7720125786163522,
46
+ "eval_recall": 0.7944983818770227,
47
+ "eval_runtime": 4.2189,
48
+ "eval_samples_per_second": 71.582,
49
+ "eval_steps_per_second": 9.007,
50
+ "step": 600
51
+ },
52
+ {
53
+ "epoch": 11.94,
54
+ "eval_accuracy_score": 0.9605125912680673,
55
+ "eval_f1": 0.8210862619808306,
56
+ "eval_loss": 0.22001390159130096,
57
+ "eval_precision": 0.8107255520504731,
58
+ "eval_recall": 0.8317152103559871,
59
+ "eval_runtime": 4.2196,
60
+ "eval_samples_per_second": 71.571,
61
+ "eval_steps_per_second": 9.006,
62
+ "step": 800
63
+ },
64
+ {
65
+ "epoch": 14.93,
66
+ "learning_rate": 4.697986577181208e-05,
67
+ "loss": 0.0075,
68
+ "step": 1000
69
+ },
70
+ {
71
+ "epoch": 14.93,
72
+ "eval_accuracy_score": 0.9640888094173745,
73
+ "eval_f1": 0.8271405492730209,
74
+ "eval_loss": 0.20800545811653137,
75
+ "eval_precision": 0.8258064516129032,
76
+ "eval_recall": 0.8284789644012945,
77
+ "eval_runtime": 4.2152,
78
+ "eval_samples_per_second": 71.645,
79
+ "eval_steps_per_second": 9.015,
80
+ "step": 1000
81
+ },
82
+ {
83
+ "epoch": 17.91,
84
+ "eval_accuracy_score": 0.9643868275964834,
85
+ "eval_f1": 0.8350764279967821,
86
+ "eval_loss": 0.2207585573196411,
87
+ "eval_precision": 0.8304,
88
+ "eval_recall": 0.8398058252427184,
89
+ "eval_runtime": 4.214,
90
+ "eval_samples_per_second": 71.666,
91
+ "eval_steps_per_second": 9.018,
92
+ "step": 1200
93
+ },
94
+ {
95
+ "epoch": 20.9,
96
+ "eval_accuracy_score": 0.9567873640292058,
97
+ "eval_f1": 0.8034455755677369,
98
+ "eval_loss": 0.24622981250286102,
99
+ "eval_precision": 0.7784522003034902,
100
+ "eval_recall": 0.8300970873786407,
101
+ "eval_runtime": 4.2176,
102
+ "eval_samples_per_second": 71.605,
103
+ "eval_steps_per_second": 9.01,
104
+ "step": 1400
105
+ },
106
+ {
107
+ "epoch": 22.39,
108
+ "learning_rate": 4.530201342281879e-05,
109
+ "loss": 0.004,
110
+ "step": 1500
111
+ },
112
+ {
113
+ "epoch": 23.88,
114
+ "eval_accuracy_score": 0.9658769184920281,
115
+ "eval_f1": 0.8436763550667713,
116
+ "eval_loss": 0.22536174952983856,
117
+ "eval_precision": 0.8198473282442749,
118
+ "eval_recall": 0.8689320388349514,
119
+ "eval_runtime": 4.214,
120
+ "eval_samples_per_second": 71.666,
121
+ "eval_steps_per_second": 9.018,
122
+ "step": 1600
123
+ },
124
+ {
125
+ "epoch": 26.87,
126
+ "eval_accuracy_score": 0.9646848457755923,
127
+ "eval_f1": 0.8192,
128
+ "eval_loss": 0.20931068062782288,
129
+ "eval_precision": 0.810126582278481,
130
+ "eval_recall": 0.8284789644012945,
131
+ "eval_runtime": 4.2136,
132
+ "eval_samples_per_second": 71.672,
133
+ "eval_steps_per_second": 9.018,
134
+ "step": 1800
135
+ },
136
+ {
137
+ "epoch": 29.85,
138
+ "learning_rate": 4.36241610738255e-05,
139
+ "loss": 0.0028,
140
+ "step": 2000
141
+ },
142
+ {
143
+ "epoch": 29.85,
144
+ "eval_accuracy_score": 0.9623007003427209,
145
+ "eval_f1": 0.822774659182037,
146
+ "eval_loss": 0.24018673598766327,
147
+ "eval_precision": 0.8155802861685215,
148
+ "eval_recall": 0.8300970873786407,
149
+ "eval_runtime": 4.2137,
150
+ "eval_samples_per_second": 71.671,
151
+ "eval_steps_per_second": 9.018,
152
+ "step": 2000
153
+ },
154
+ {
155
+ "epoch": 32.84,
156
+ "eval_accuracy_score": 0.9564893458500968,
157
+ "eval_f1": 0.8009630818619582,
158
+ "eval_loss": 0.26548677682876587,
159
+ "eval_precision": 0.7945859872611465,
160
+ "eval_recall": 0.8074433656957929,
161
+ "eval_runtime": 4.2161,
162
+ "eval_samples_per_second": 71.63,
163
+ "eval_steps_per_second": 9.013,
164
+ "step": 2200
165
+ },
166
+ {
167
+ "epoch": 35.82,
168
+ "eval_accuracy_score": 0.958128445835196,
169
+ "eval_f1": 0.8034727703235991,
170
+ "eval_loss": 0.2520817518234253,
171
+ "eval_precision": 0.7842835130970724,
172
+ "eval_recall": 0.8236245954692557,
173
+ "eval_runtime": 4.2073,
174
+ "eval_samples_per_second": 71.78,
175
+ "eval_steps_per_second": 9.032,
176
+ "step": 2400
177
+ },
178
+ {
179
+ "epoch": 37.31,
180
+ "learning_rate": 4.194630872483222e-05,
181
+ "loss": 0.0033,
182
+ "step": 2500
183
+ },
184
+ {
185
+ "epoch": 38.81,
186
+ "eval_accuracy_score": 0.9637907912382655,
187
+ "eval_f1": 0.8270313757039421,
188
+ "eval_loss": 0.2613898515701294,
189
+ "eval_precision": 0.8224,
190
+ "eval_recall": 0.8317152103559871,
191
+ "eval_runtime": 4.201,
192
+ "eval_samples_per_second": 71.887,
193
+ "eval_steps_per_second": 9.045,
194
+ "step": 2600
195
+ },
196
+ {
197
+ "epoch": 38.81,
198
+ "step": 2600,
199
+ "total_flos": 1.0731614329737216e+16,
200
+ "train_loss": 0.0309662873011369,
201
+ "train_runtime": 2918.4318,
202
+ "train_samples_per_second": 164.472,
203
+ "train_steps_per_second": 5.14
204
+ }
205
+ ],
206
+ "max_steps": 15000,
207
+ "num_train_epochs": 224,
208
+ "total_flos": 1.0731614329737216e+16,
209
+ "trial_name": null,
210
+ "trial_params": null
211
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ae552f85b02ab08b3de371826d5954ae1689120a214df11380b1c4bd3598737
3
+ size 3259
vocab.txt ADDED
The diff for this file is too large to render. See raw diff