yoshitomo-matsubara
commited on
Commit
•
50aa989
1
Parent(s):
ec14c64
added files
Browse files- config.json +36 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +1 -0
- tokenizer.json +0 -0
- tokenizer_config.json +1 -0
- training.log +78 -0
- vocab.txt +0 -0
config.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "bert-large-uncased",
|
3 |
+
"architectures": [
|
4 |
+
"BertForSequenceClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"finetuning_task": "mnli",
|
8 |
+
"gradient_checkpointing": false,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 1024,
|
12 |
+
"id2label": {
|
13 |
+
"0": "LABEL_0",
|
14 |
+
"1": "LABEL_1",
|
15 |
+
"2": "LABEL_2"
|
16 |
+
},
|
17 |
+
"initializer_range": 0.02,
|
18 |
+
"intermediate_size": 4096,
|
19 |
+
"label2id": {
|
20 |
+
"LABEL_0": 0,
|
21 |
+
"LABEL_1": 1,
|
22 |
+
"LABEL_2": 2
|
23 |
+
},
|
24 |
+
"layer_norm_eps": 1e-12,
|
25 |
+
"max_position_embeddings": 512,
|
26 |
+
"model_type": "bert",
|
27 |
+
"num_attention_heads": 16,
|
28 |
+
"num_hidden_layers": 24,
|
29 |
+
"pad_token_id": 0,
|
30 |
+
"position_embedding_type": "absolute",
|
31 |
+
"problem_type": "single_label_classification",
|
32 |
+
"transformers_version": "4.6.1",
|
33 |
+
"type_vocab_size": 2,
|
34 |
+
"use_cache": true,
|
35 |
+
"vocab_size": 30522
|
36 |
+
}
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:60e65b82b971489a2d598bc76b7fd2dbc2666b6e36945ec825c85bcb31a7cd4f
|
3 |
+
size 1340750921
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "do_lower": true, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "bert-large-uncased"}
|
training.log
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2021-05-22 16:51:27,390 INFO __main__ Namespace(adjust_lr=False, config='torchdistill/configs/sample/glue/mnli/ce/bert_large_uncased.yaml', log='log/glue/mnli/ce/bert_large_uncased.txt', private_output='leaderboard/glue/standard/bert_large_uncased/', seed=None, student_only=False, task_name='mnli', test_only=False, world_size=1)
|
2 |
+
2021-05-22 16:51:27,468 INFO __main__ Distributed environment: NO
|
3 |
+
Num processes: 1
|
4 |
+
Process index: 0
|
5 |
+
Local process index: 0
|
6 |
+
Device: cuda
|
7 |
+
Use FP16 precision: True
|
8 |
+
|
9 |
+
2021-05-22 16:51:28,560 INFO filelock Lock 140388380183568 acquired on /root/.cache/huggingface/transformers/1cf090f220f9674b67b3434decfe4d40a6532d7849653eac435ff94d31a4904c.1d03e5e4fa2db2532c517b2cd98290d8444b237619bd3d2039850a6d5e86473d.lock
|
10 |
+
2021-05-22 16:51:29,127 INFO filelock Lock 140388380183568 released on /root/.cache/huggingface/transformers/1cf090f220f9674b67b3434decfe4d40a6532d7849653eac435ff94d31a4904c.1d03e5e4fa2db2532c517b2cd98290d8444b237619bd3d2039850a6d5e86473d.lock
|
11 |
+
2021-05-22 16:51:30,244 INFO filelock Lock 140388420260240 acquired on /root/.cache/huggingface/transformers/e12f02d630da91a0982ce6db1ad595231d155a2b725ab106971898276d842ecc.d789d64ebfe299b0e416afc4a169632f903f693095b4629a7ea271d5a0cf2c99.lock
|
12 |
+
2021-05-22 16:51:31,503 INFO filelock Lock 140388420260240 released on /root/.cache/huggingface/transformers/e12f02d630da91a0982ce6db1ad595231d155a2b725ab106971898276d842ecc.d789d64ebfe299b0e416afc4a169632f903f693095b4629a7ea271d5a0cf2c99.lock
|
13 |
+
2021-05-22 16:51:32,063 INFO filelock Lock 140388380324432 acquired on /root/.cache/huggingface/transformers/475d46024228961ca8770cead39e1079f135fd2441d14cf216727ffac8d41d78.7f2721073f19841be16f41b0a70b600ca6b880c8f3df6f3535cbc704371bdfa4.lock
|
14 |
+
2021-05-22 16:51:33,505 INFO filelock Lock 140388380324432 released on /root/.cache/huggingface/transformers/475d46024228961ca8770cead39e1079f135fd2441d14cf216727ffac8d41d78.7f2721073f19841be16f41b0a70b600ca6b880c8f3df6f3535cbc704371bdfa4.lock
|
15 |
+
2021-05-22 16:51:35,451 INFO filelock Lock 140388380350864 acquired on /root/.cache/huggingface/transformers/300ecd79785b4602752c0085f8a89c3f0232ef367eda291c79a5600f3778b677.20430bd8e10ef77a7d2977accefe796051e01bc2fc4aa146bc862997a1a15e79.lock
|
16 |
+
2021-05-22 16:51:36,009 INFO filelock Lock 140388380350864 released on /root/.cache/huggingface/transformers/300ecd79785b4602752c0085f8a89c3f0232ef367eda291c79a5600f3778b677.20430bd8e10ef77a7d2977accefe796051e01bc2fc4aa146bc862997a1a15e79.lock
|
17 |
+
2021-05-22 16:51:36,585 INFO filelock Lock 140388380327440 acquired on /root/.cache/huggingface/transformers/1d959166dd7e047e57ea1b2d9b7b9669938a7e90c5e37a03961ad9f15eaea17f.fea64cd906e3766b04c92397f9ad3ff45271749cbe49829a079dd84e34c1697d.lock
|
18 |
+
2021-05-22 16:51:59,770 INFO filelock Lock 140388380327440 released on /root/.cache/huggingface/transformers/1d959166dd7e047e57ea1b2d9b7b9669938a7e90c5e37a03961ad9f15eaea17f.fea64cd906e3766b04c92397f9ad3ff45271749cbe49829a079dd84e34c1697d.lock
|
19 |
+
2021-05-22 16:53:50,809 INFO __main__ Start training
|
20 |
+
2021-05-22 16:53:50,810 INFO torchdistill.models.util [student model]
|
21 |
+
2021-05-22 16:53:50,810 INFO torchdistill.models.util Using the original student model
|
22 |
+
2021-05-22 16:53:50,810 INFO torchdistill.core.training Loss = 1.0 * OrgLoss
|
23 |
+
2021-05-22 16:53:58,061 INFO torchdistill.misc.log Epoch: [0] [ 0/12272] eta: 1:16:07 lr: 1.9999456757931336e-05 sample/s: 12.183084487517528 loss: 1.1712 (1.1712) time: 0.3722 data: 0.0438 max mem: 6528
|
24 |
+
2021-05-22 16:57:39,884 INFO torchdistill.misc.log Epoch: [0] [ 1000/12272] eta: 0:41:42 lr: 1.945621468926554e-05 sample/s: 20.478750076289288 loss: 0.5207 (0.7022) time: 0.2147 data: 0.0042 max mem: 12387
|
25 |
+
2021-05-22 17:01:20,141 INFO torchdistill.misc.log Epoch: [0] [ 2000/12272] eta: 0:37:51 lr: 1.891297262059974e-05 sample/s: 15.393974983851077 loss: 0.4313 (0.6009) time: 0.2211 data: 0.0043 max mem: 12387
|
26 |
+
2021-05-22 17:05:02,731 INFO torchdistill.misc.log Epoch: [0] [ 3000/12272] eta: 0:34:14 lr: 1.8369730551933943e-05 sample/s: 18.06303300441961 loss: 0.4815 (0.5557) time: 0.2245 data: 0.0044 max mem: 12387
|
27 |
+
2021-05-22 17:08:46,475 INFO torchdistill.misc.log Epoch: [0] [ 4000/12272] eta: 0:30:37 lr: 1.7826488483268146e-05 sample/s: 21.499356065605703 loss: 0.3788 (0.5245) time: 0.2279 data: 0.0044 max mem: 12387
|
28 |
+
2021-05-22 17:12:27,148 INFO torchdistill.misc.log Epoch: [0] [ 5000/12272] eta: 0:26:53 lr: 1.728324641460235e-05 sample/s: 18.00671446357275 loss: 0.4535 (0.5039) time: 0.2255 data: 0.0045 max mem: 12387
|
29 |
+
2021-05-22 17:16:11,174 INFO torchdistill.misc.log Epoch: [0] [ 6000/12272] eta: 0:23:13 lr: 1.674000434593655e-05 sample/s: 18.482076681398965 loss: 0.3041 (0.4878) time: 0.2298 data: 0.0048 max mem: 12387
|
30 |
+
2021-05-22 17:19:54,854 INFO torchdistill.misc.log Epoch: [0] [ 7000/12272] eta: 0:19:32 lr: 1.6196762277270753e-05 sample/s: 17.582475809604055 loss: 0.4186 (0.4757) time: 0.2196 data: 0.0043 max mem: 12387
|
31 |
+
2021-05-22 17:23:37,409 INFO torchdistill.misc.log Epoch: [0] [ 8000/12272] eta: 0:15:50 lr: 1.5653520208604957e-05 sample/s: 18.052984294095115 loss: 0.3516 (0.4650) time: 0.2145 data: 0.0044 max mem: 12387
|
32 |
+
2021-05-22 17:27:17,991 INFO torchdistill.misc.log Epoch: [0] [ 9000/12272] eta: 0:12:07 lr: 1.5110278139939158e-05 sample/s: 22.68604630722653 loss: 0.3512 (0.4559) time: 0.2295 data: 0.0043 max mem: 12387
|
33 |
+
2021-05-22 17:31:01,464 INFO torchdistill.misc.log Epoch: [0] [10000/12272] eta: 0:08:25 lr: 1.4567036071273362e-05 sample/s: 16.33036813667613 loss: 0.2831 (0.4491) time: 0.2283 data: 0.0043 max mem: 12387
|
34 |
+
2021-05-22 17:34:44,016 INFO torchdistill.misc.log Epoch: [0] [11000/12272] eta: 0:04:42 lr: 1.4023794002607562e-05 sample/s: 20.46775732289036 loss: 0.3536 (0.4437) time: 0.2124 data: 0.0044 max mem: 12387
|
35 |
+
2021-05-22 17:38:24,552 INFO torchdistill.misc.log Epoch: [0] [12000/12272] eta: 0:01:00 lr: 1.3480551933941765e-05 sample/s: 18.079364121485902 loss: 0.4479 (0.4387) time: 0.2239 data: 0.0043 max mem: 12387
|
36 |
+
2021-05-22 17:39:24,832 INFO torchdistill.misc.log Epoch: [0] Total time: 0:45:27
|
37 |
+
2021-05-22 17:39:43,192 INFO /usr/local/lib/python3.7/dist-packages/datasets/metric.py Removing /root/.cache/huggingface/metrics/glue/mnli/default_experiment-1-0.arrow
|
38 |
+
2021-05-22 17:39:43,193 INFO __main__ Validation: accuracy = 0.8611309220580744
|
39 |
+
2021-05-22 17:39:43,193 INFO __main__ Updating ckpt
|
40 |
+
2021-05-22 17:39:48,350 INFO torchdistill.misc.log Epoch: [1] [ 0/12272] eta: 0:46:53 lr: 1.333279009126467e-05 sample/s: 20.607311527884058 loss: 0.2311 (0.2311) time: 0.2293 data: 0.0352 max mem: 12387
|
41 |
+
2021-05-22 17:43:29,458 INFO torchdistill.misc.log Epoch: [1] [ 1000/12272] eta: 0:41:32 lr: 1.2789548022598873e-05 sample/s: 20.56273287053379 loss: 0.1454 (0.2223) time: 0.2243 data: 0.0042 max mem: 12387
|
42 |
+
2021-05-22 17:47:11,803 INFO torchdistill.misc.log Epoch: [1] [ 2000/12272] eta: 0:37:57 lr: 1.2246305953933073e-05 sample/s: 18.59424568869974 loss: 0.2245 (0.2225) time: 0.2319 data: 0.0048 max mem: 12387
|
43 |
+
2021-05-22 17:50:54,138 INFO torchdistill.misc.log Epoch: [1] [ 3000/12272] eta: 0:34:17 lr: 1.1703063885267276e-05 sample/s: 18.018356427434547 loss: 0.2228 (0.2222) time: 0.2174 data: 0.0043 max mem: 12387
|
44 |
+
2021-05-22 17:54:36,556 INFO torchdistill.misc.log Epoch: [1] [ 4000/12272] eta: 0:30:36 lr: 1.115982181660148e-05 sample/s: 20.332667584495162 loss: 0.1867 (0.2216) time: 0.2261 data: 0.0043 max mem: 12387
|
45 |
+
2021-05-22 17:58:20,409 INFO torchdistill.misc.log Epoch: [1] [ 5000/12272] eta: 0:26:57 lr: 1.061657974793568e-05 sample/s: 17.845956815829588 loss: 0.1976 (0.2198) time: 0.2294 data: 0.0042 max mem: 12387
|
46 |
+
2021-05-22 18:02:02,847 INFO torchdistill.misc.log Epoch: [1] [ 6000/12272] eta: 0:23:14 lr: 1.0073337679269883e-05 sample/s: 21.467115188009178 loss: 0.1879 (0.2201) time: 0.2190 data: 0.0042 max mem: 12387
|
47 |
+
2021-05-22 18:05:42,715 INFO torchdistill.misc.log Epoch: [1] [ 7000/12272] eta: 0:19:30 lr: 9.530095610604087e-06 sample/s: 17.160899388528726 loss: 0.2007 (0.2208) time: 0.2206 data: 0.0043 max mem: 12387
|
48 |
+
2021-05-22 18:09:23,036 INFO torchdistill.misc.log Epoch: [1] [ 8000/12272] eta: 0:15:47 lr: 8.986853541938288e-06 sample/s: 18.76536935211822 loss: 0.1739 (0.2222) time: 0.2313 data: 0.0043 max mem: 12387
|
49 |
+
2021-05-22 18:13:05,941 INFO torchdistill.misc.log Epoch: [1] [ 9000/12272] eta: 0:12:06 lr: 8.44361147327249e-06 sample/s: 21.49795107699799 loss: 0.1878 (0.2220) time: 0.2245 data: 0.0043 max mem: 12387
|
50 |
+
2021-05-22 18:16:47,980 INFO torchdistill.misc.log Epoch: [1] [10000/12272] eta: 0:08:24 lr: 7.900369404606693e-06 sample/s: 14.830137577212604 loss: 0.1875 (0.2221) time: 0.2259 data: 0.0043 max mem: 12387
|
51 |
+
2021-05-22 18:20:30,312 INFO torchdistill.misc.log Epoch: [1] [11000/12272] eta: 0:04:42 lr: 7.357127335940896e-06 sample/s: 20.469380509379288 loss: 0.2438 (0.2223) time: 0.2209 data: 0.0042 max mem: 12387
|
52 |
+
2021-05-22 18:24:12,649 INFO torchdistill.misc.log Epoch: [1] [12000/12272] eta: 0:01:00 lr: 6.813885267275099e-06 sample/s: 22.500544501606683 loss: 0.1945 (0.2219) time: 0.2258 data: 0.0043 max mem: 12387
|
53 |
+
2021-05-22 18:25:12,909 INFO torchdistill.misc.log Epoch: [1] Total time: 0:45:24
|
54 |
+
2021-05-22 18:25:31,246 INFO /usr/local/lib/python3.7/dist-packages/datasets/metric.py Removing /root/.cache/huggingface/metrics/glue/mnli/default_experiment-1-0.arrow
|
55 |
+
2021-05-22 18:25:31,247 INFO __main__ Validation: accuracy = 0.8580743759551707
|
56 |
+
2021-05-22 18:25:31,544 INFO torchdistill.misc.log Epoch: [2] [ 0/12272] eta: 1:00:43 lr: 6.666123424598001e-06 sample/s: 14.723753056256177 loss: 0.2830 (0.2830) time: 0.2969 data: 0.0253 max mem: 12387
|
57 |
+
2021-05-22 18:29:12,317 INFO torchdistill.misc.log Epoch: [2] [ 1000/12272] eta: 0:41:29 lr: 6.122881355932204e-06 sample/s: 21.425308023947203 loss: 0.0852 (0.1137) time: 0.2224 data: 0.0042 max mem: 12387
|
58 |
+
2021-05-22 18:32:53,024 INFO torchdistill.misc.log Epoch: [2] [ 2000/12272] eta: 0:37:47 lr: 5.579639287266406e-06 sample/s: 16.40405652223226 loss: 0.0455 (0.1167) time: 0.2192 data: 0.0041 max mem: 12387
|
59 |
+
2021-05-22 18:36:32,375 INFO torchdistill.misc.log Epoch: [2] [ 3000/12272] eta: 0:34:02 lr: 5.0363972186006095e-06 sample/s: 18.737662697375622 loss: 0.1362 (0.1172) time: 0.2145 data: 0.0043 max mem: 12387
|
60 |
+
2021-05-22 18:40:14,688 INFO torchdistill.misc.log Epoch: [2] [ 4000/12272] eta: 0:30:26 lr: 4.493155149934811e-06 sample/s: 18.113464380838863 loss: 0.0532 (0.1177) time: 0.2220 data: 0.0043 max mem: 12387
|
61 |
+
2021-05-22 18:43:57,248 INFO torchdistill.misc.log Epoch: [2] [ 5000/12272] eta: 0:26:48 lr: 3.949913081269014e-06 sample/s: 14.875937326267126 loss: 0.1144 (0.1180) time: 0.2276 data: 0.0043 max mem: 12387
|
62 |
+
2021-05-22 18:47:39,622 INFO torchdistill.misc.log Epoch: [2] [ 6000/12272] eta: 0:23:08 lr: 3.4066710126032164e-06 sample/s: 16.430757056702372 loss: 0.1580 (0.1191) time: 0.2224 data: 0.0044 max mem: 12387
|
63 |
+
2021-05-22 18:51:21,874 INFO torchdistill.misc.log Epoch: [2] [ 7000/12272] eta: 0:19:27 lr: 2.8634289439374186e-06 sample/s: 18.773201563424898 loss: 0.0951 (0.1196) time: 0.2272 data: 0.0042 max mem: 12387
|
64 |
+
2021-05-22 18:55:04,803 INFO torchdistill.misc.log Epoch: [2] [ 8000/12272] eta: 0:15:46 lr: 2.320186875271621e-06 sample/s: 14.910151872387736 loss: 0.0195 (0.1192) time: 0.2168 data: 0.0042 max mem: 12387
|
65 |
+
2021-05-22 18:58:47,324 INFO torchdistill.misc.log Epoch: [2] [ 9000/12272] eta: 0:12:05 lr: 1.7769448066058238e-06 sample/s: 17.93071695225555 loss: 0.0373 (0.1192) time: 0.2179 data: 0.0042 max mem: 12387
|
66 |
+
2021-05-22 19:02:29,181 INFO torchdistill.misc.log Epoch: [2] [10000/12272] eta: 0:08:23 lr: 1.2337027379400262e-06 sample/s: 14.847908469314245 loss: 0.0573 (0.1188) time: 0.2322 data: 0.0042 max mem: 12387
|
67 |
+
2021-05-22 19:06:11,866 INFO torchdistill.misc.log Epoch: [2] [11000/12272] eta: 0:04:42 lr: 6.904606692742287e-07 sample/s: 14.793588146235695 loss: 0.0291 (0.1186) time: 0.2280 data: 0.0043 max mem: 12387
|
68 |
+
2021-05-22 19:09:52,653 INFO torchdistill.misc.log Epoch: [2] [12000/12272] eta: 0:01:00 lr: 1.4721860060843112e-07 sample/s: 20.479150035276856 loss: 0.0900 (0.1190) time: 0.2209 data: 0.0042 max mem: 12387
|
69 |
+
2021-05-22 19:10:51,967 INFO torchdistill.misc.log Epoch: [2] Total time: 0:45:20
|
70 |
+
2021-05-22 19:11:10,326 INFO /usr/local/lib/python3.7/dist-packages/datasets/metric.py Removing /root/.cache/huggingface/metrics/glue/mnli/default_experiment-1-0.arrow
|
71 |
+
2021-05-22 19:11:10,326 INFO __main__ Validation: accuracy = 0.8541008660213958
|
72 |
+
2021-05-22 19:11:18,288 INFO __main__ [Student: bert-large-uncased]
|
73 |
+
2021-05-22 19:11:36,643 INFO /usr/local/lib/python3.7/dist-packages/datasets/metric.py Removing /root/.cache/huggingface/metrics/glue/mnli/default_experiment-1-0.arrow
|
74 |
+
2021-05-22 19:11:36,643 INFO __main__ Test: accuracy = 0.8611309220580744
|
75 |
+
2021-05-22 19:11:36,644 INFO __main__ Start prediction for private dataset(s)
|
76 |
+
2021-05-22 19:11:36,645 INFO __main__ mnli/test_m: 9796 samples
|
77 |
+
2021-05-22 19:11:54,962 INFO __main__ mnli/test_mm: 9847 samples
|
78 |
+
2021-05-22 19:12:13,295 INFO __main__ ax/test_ax: 1104 samples
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|