Kornraphop Kawintiranon
commited on
Commit
•
031f606
1
Parent(s):
06a264e
first version trained for 2 epochs
Browse files- added_tokens.json +1 -0
- config.json +20 -0
- eval_results_lm.txt +1 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +1 -0
- tokenizer_config.json +1 -0
- training_args.bin +3 -0
- vocab.txt +0 -0
added_tokens.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"[URL47]": 30669, "[URL25]": 30647, "[URL72]": 30694, "[@USER25]": 30547, "[@USER66]": 30588, "[@USER56]": 30578, "[URL90]": 30712, "[@USER07]": 30529, "[URL48]": 30670, "[@USER77]": 30599, "[URL50]": 30672, "[@USER95]": 30617, "[URL91]": 30713, "[@USER46]": 30568, "[@USER10]": 30532, "[@USER79]": 30601, "[URL20]": 30642, "[@USER72]": 30594, "[URL79]": 30701, "[URL95]": 30717, "[URL51]": 30673, "[@USER73]": 30595, "[URL15]": 30637, "[@USER65]": 30587, "[@USER01]": 30523, "[@USER05]": 30527, "[@USER16]": 30538, "[URL57]": 30679, "[URL03]": 30625, "[@USER68]": 30590, "[URL19]": 30641, "[@USER48]": 30570, "[@USER21]": 30543, "[@USER02]": 30524, "[@USER94]": 30616, "[@USER92]": 30614, "[URL04]": 30626, "[@USER76]": 30598, "[@USER45]": 30567, "[@USER00]": 30522, "[URL60]": 30682, "[URL78]": 30700, "[@USER53]": 30575, "[URL81]": 30703, "[URL43]": 30665, "[URL61]": 30683, "[URL87]": 30709, "[URL55]": 30677, "[URL14]": 30636, "[URL64]": 30686, "[@USER63]": 30585, "[@USER61]": 30583, "[@USER17]": 30539, "[URL06]": 30628, "[@USER67]": 30589, "[@USER18]": 30540, "[URL01]": 30623, "[@USER47]": 30569, "[URL84]": 30706, "[@USER32]": 30554, "[@USER54]": 30576, "[@USER82]": 30604, "[URL75]": 30697, "[@USER19]": 30541, "[@USER96]": 30618, "[URL92]": 30714, "[@USER91]": 30613, "[@USER90]": 30612, "[URL62]": 30684, "[URL41]": 30663, "[@USER49]": 30571, "[URL45]": 30667, "[@USER83]": 30605, "[URL18]": 30640, "[@USER78]": 30600, "[URL38]": 30660, "[@USER97]": 30619, "[@USER31]": 30553, "[URL22]": 30644, "[URL56]": 30678, "[URL32]": 30654, "[URL13]": 30635, "[URL69]": 30691, "[@USER13]": 30535, "[@USER75]": 30597, "[URL88]": 30710, "[@USER40]": 30562, "[URL58]": 30680, "[@USER84]": 30606, "[@USER03]": 30525, "[URL23]": 30645, "[@USER69]": 30591, "[@USER09]": 30531, "[@USER81]": 30603, "[URL54]": 30676, "[URL71]": 30693, "[URL28]": 30650, "[URL68]": 30690, "[@USER89]": 30611, "[@USER12]": 30534, "[@USER37]": 30559, "[URL30]": 30652, "[@USER58]": 30580, "[URL40]": 30662, "[@USER22]": 30544, "[URL16]": 30638, "[URL49]": 30671, "[@USER11]": 30533, "[URL52]": 30674, "[@USER57]": 30579, "[URL70]": 30692, "[@USER34]": 30556, "[URL33]": 30655, "[URL02]": 30624, "[@USER14]": 30536, "[URL63]": 30685, "[URL07]": 30629, "[@USER06]": 30528, "[@USER85]": 30607, "[URL26]": 30648, "[URL34]": 30656, "[URL09]": 30631, "[@USER35]": 30557, "[URL29]": 30651, "[URL46]": 30668, "[@USER86]": 30608, "[URL00]": 30622, "[@USER43]": 30565, "[URL12]": 30634, "[URL39]": 30661, "[@USER04]": 30526, "[URL85]": 30707, "[URL31]": 30653, "[URL11]": 30633, "[@USER51]": 30573, "[@USER26]": 30548, "[URL24]": 30646, "[@USER62]": 30584, "[URL83]": 30705, "[@USER52]": 30574, "[@USER38]": 30560, "[@USER88]": 30610, "[@USER20]": 30542, "[@USER87]": 30609, "[@USER80]": 30602, "[URL53]": 30675, "[URL82]": 30704, "[URL89]": 30711, "[URL27]": 30649, "[@USER15]": 30537, "[URL76]": 30698, "[@USER29]": 30551, "[@USER24]": 30546, "[@USER33]": 30555, "[@USER64]": 30586, "[URL80]": 30702, "[@USER59]": 30581, "[@USER27]": 30549, "[@USER74]": 30596, "[@USER42]": 30564, "[URL10]": 30632, "[URL44]": 30666, "[@USER98]": 30620, "[URL86]": 30708, "[URL77]": 30699, "[@USER93]": 30615, "[URL37]": 30659, "[@USER36]": 30558, "[URL08]": 30630, "[URL59]": 30681, "[URL21]": 30643, "[@USER60]": 30582, "[URL93]": 30715, "[@USER08]": 30530, "[URL35]": 30657, "[@USER28]": 30550, "[@USER50]": 30572, "[@USER39]": 30561, "[@USER44]": 30566, "[URL97]": 30719, "[URL99]": 30721, "[@USER23]": 30545, "[URL42]": 30664, "[URL17]": 30639, "[@USER70]": 30592, "[URL98]": 30720, "[URL65]": 30687, "[@USER55]": 30577, "[URL66]": 30688, "[@USER71]": 30593, "[@USER30]": 30552, "[URL36]": 30658, "[@USER41]": 30563, "[URL96]": 30718, "[@USER99]": 30621, "[URL05]": 30627, "[URL74]": 30696, "[URL73]": 30695, "[URL94]": 30716, "[URL67]": 30689}
|
config.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"BertForMaskedLM"
|
4 |
+
],
|
5 |
+
"attention_probs_dropout_prob": 0.1,
|
6 |
+
"gradient_checkpointing": false,
|
7 |
+
"hidden_act": "gelu",
|
8 |
+
"hidden_dropout_prob": 0.1,
|
9 |
+
"hidden_size": 768,
|
10 |
+
"initializer_range": 0.02,
|
11 |
+
"intermediate_size": 3072,
|
12 |
+
"layer_norm_eps": 1e-12,
|
13 |
+
"max_position_embeddings": 512,
|
14 |
+
"model_type": "bert",
|
15 |
+
"num_attention_heads": 12,
|
16 |
+
"num_hidden_layers": 12,
|
17 |
+
"pad_token_id": 0,
|
18 |
+
"type_vocab_size": 2,
|
19 |
+
"vocab_size": 30722
|
20 |
+
}
|
eval_results_lm.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
perplexity = 4.261143611431846
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f5f5effd694f42c4931d1bb5c81855cdda337ff7eb05c34c5531435573f0e173
|
3 |
+
size 441090388
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "additional_special_tokens": ["[@USER00]", "[@USER01]", "[@USER02]", "[@USER03]", "[@USER04]", "[@USER05]", "[@USER06]", "[@USER07]", "[@USER08]", "[@USER09]", "[@USER10]", "[@USER11]", "[@USER12]", "[@USER13]", "[@USER14]", "[@USER15]", "[@USER16]", "[@USER17]", "[@USER18]", "[@USER19]", "[@USER20]", "[@USER21]", "[@USER22]", "[@USER23]", "[@USER24]", "[@USER25]", "[@USER26]", "[@USER27]", "[@USER28]", "[@USER29]", "[@USER30]", "[@USER31]", "[@USER32]", "[@USER33]", "[@USER34]", "[@USER35]", "[@USER36]", "[@USER37]", "[@USER38]", "[@USER39]", "[@USER40]", "[@USER41]", "[@USER42]", "[@USER43]", "[@USER44]", "[@USER45]", "[@USER46]", "[@USER47]", "[@USER48]", "[@USER49]", "[@USER50]", "[@USER51]", "[@USER52]", "[@USER53]", "[@USER54]", "[@USER55]", "[@USER56]", "[@USER57]", "[@USER58]", "[@USER59]", "[@USER60]", "[@USER61]", "[@USER62]", "[@USER63]", "[@USER64]", "[@USER65]", "[@USER66]", "[@USER67]", "[@USER68]", "[@USER69]", "[@USER70]", "[@USER71]", "[@USER72]", "[@USER73]", "[@USER74]", "[@USER75]", "[@USER76]", "[@USER77]", "[@USER78]", "[@USER79]", "[@USER80]", "[@USER81]", "[@USER82]", "[@USER83]", "[@USER84]", "[@USER85]", "[@USER86]", "[@USER87]", "[@USER88]", "[@USER89]", "[@USER90]", "[@USER91]", "[@USER92]", "[@USER93]", "[@USER94]", "[@USER95]", "[@USER96]", "[@USER97]", "[@USER98]", "[@USER99]", "[URL00]", "[URL01]", "[URL02]", "[URL03]", "[URL04]", "[URL05]", "[URL06]", "[URL07]", "[URL08]", "[URL09]", "[URL10]", "[URL11]", "[URL12]", "[URL13]", "[URL14]", "[URL15]", "[URL16]", "[URL17]", "[URL18]", "[URL19]", "[URL20]", "[URL21]", "[URL22]", "[URL23]", "[URL24]", "[URL25]", "[URL26]", "[URL27]", "[URL28]", "[URL29]", "[URL30]", "[URL31]", "[URL32]", "[URL33]", "[URL34]", "[URL35]", "[URL36]", "[URL37]", "[URL38]", "[URL39]", "[URL40]", "[URL41]", "[URL42]", "[URL43]", "[URL44]", "[URL45]", "[URL46]", "[URL47]", "[URL48]", "[URL49]", "[URL50]", "[URL51]", "[URL52]", "[URL53]", "[URL54]", "[URL55]", "[URL56]", "[URL57]", "[URL58]", "[URL59]", "[URL60]", "[URL61]", "[URL62]", "[URL63]", "[URL64]", "[URL65]", "[URL66]", "[URL67]", "[URL68]", "[URL69]", "[URL70]", "[URL71]", "[URL72]", "[URL73]", "[URL74]", "[URL75]", "[URL76]", "[URL77]", "[URL78]", "[URL79]", "[URL80]", "[URL81]", "[URL82]", "[URL83]", "[URL84]", "[URL85]", "[URL86]", "[URL87]", "[URL88]", "[URL89]", "[URL90]", "[URL91]", "[URL92]", "[URL93]", "[URL94]", "[URL95]", "[URL96]", "[URL97]", "[URL98]", "[URL99]"]}
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"do_lower_case": true, "model_max_length": 512, "special_tokens_map_file": "/home/ken/projects/Stance-Sentiment/language_models/English/election2020/bert-election2020-tweet-5M-1ep-lr-1e-4/special_tokens_map.json", "full_tokenizer_file": null}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:45dbddf746a613fd6bdb3342b5d6d0f13ba15ed9e23d1971da046edf02be8633
|
3 |
+
size 1339
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|