Deehan1866
commited on
Commit
•
7813a93
1
Parent(s):
a86a921
Upload 16 files
Browse files- README.md +52 -3
- all_results.json +11 -0
- config.json +45 -0
- eval_nbest_predictions.json +0 -0
- eval_predictions.json +502 -0
- eval_results.json +6 -0
- finetune_logs.txt +0 -0
- merges.txt +0 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +15 -0
- tokenizer.json +0 -0
- tokenizer_config.json +16 -0
- train_results.json +8 -0
- trainer_state.json +43 -0
- training_args.bin +3 -0
- vocab.json +0 -0
README.md
CHANGED
@@ -1,3 +1,52 @@
|
|
1 |
-
---
|
2 |
-
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
tags:
|
3 |
+
- generated_from_trainer
|
4 |
+
datasets:
|
5 |
+
- PiC/phrase_sense_disambiguation
|
6 |
+
model-index:
|
7 |
+
- name: finetuned
|
8 |
+
results: []
|
9 |
+
---
|
10 |
+
|
11 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
12 |
+
should probably proofread and complete it, then remove this comment. -->
|
13 |
+
|
14 |
+
# finetuned
|
15 |
+
|
16 |
+
This model is a fine-tuned version of [../results/phrase_retrieval/PR-pass/qa/allenai/longformer-base-4096/finetuned](https://huggingface.co/../results/phrase_retrieval/PR-pass/qa/allenai/longformer-base-4096/finetuned) on the PiC/phrase_sense_disambiguation dataset.
|
17 |
+
|
18 |
+
## Model description
|
19 |
+
|
20 |
+
More information needed
|
21 |
+
|
22 |
+
## Intended uses & limitations
|
23 |
+
|
24 |
+
More information needed
|
25 |
+
|
26 |
+
## Training and evaluation data
|
27 |
+
|
28 |
+
More information needed
|
29 |
+
|
30 |
+
## Training procedure
|
31 |
+
|
32 |
+
### Training hyperparameters
|
33 |
+
|
34 |
+
The following hyperparameters were used during training:
|
35 |
+
- learning_rate: 3e-05
|
36 |
+
- train_batch_size: 2
|
37 |
+
- eval_batch_size: 2
|
38 |
+
- seed: 42
|
39 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
40 |
+
- lr_scheduler_type: linear
|
41 |
+
- num_epochs: 2.0
|
42 |
+
|
43 |
+
### Training results
|
44 |
+
|
45 |
+
|
46 |
+
|
47 |
+
### Framework versions
|
48 |
+
|
49 |
+
- Transformers 4.20.1
|
50 |
+
- Pytorch 1.10.0+cu113
|
51 |
+
- Datasets 2.3.2
|
52 |
+
- Tokenizers 0.12.1
|
all_results.json
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 2.0,
|
3 |
+
"eval_exact_match": 69.39999999999999,
|
4 |
+
"eval_f1": 69.43,
|
5 |
+
"eval_samples": 500,
|
6 |
+
"train_loss": 0.9066677579012784,
|
7 |
+
"train_runtime": 893.6642,
|
8 |
+
"train_samples": 1650,
|
9 |
+
"train_samples_per_second": 3.693,
|
10 |
+
"train_steps_per_second": 1.846
|
11 |
+
}
|
config.json
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "../results/phrase_retrieval/PR-pass/qa/allenai/longformer-base-4096/finetuned",
|
3 |
+
"architectures": [
|
4 |
+
"LongformerForQuestionAnswering"
|
5 |
+
],
|
6 |
+
"attention_mode": "longformer",
|
7 |
+
"attention_probs_dropout_prob": 0.1,
|
8 |
+
"attention_window": [
|
9 |
+
512,
|
10 |
+
512,
|
11 |
+
512,
|
12 |
+
512,
|
13 |
+
512,
|
14 |
+
512,
|
15 |
+
512,
|
16 |
+
512,
|
17 |
+
512,
|
18 |
+
512,
|
19 |
+
512,
|
20 |
+
512
|
21 |
+
],
|
22 |
+
"bos_token_id": 0,
|
23 |
+
"classifier_dropout": null,
|
24 |
+
"eos_token_id": 2,
|
25 |
+
"gradient_checkpointing": false,
|
26 |
+
"hidden_act": "gelu",
|
27 |
+
"hidden_dropout_prob": 0.1,
|
28 |
+
"hidden_size": 768,
|
29 |
+
"ignore_attention_mask": false,
|
30 |
+
"initializer_range": 0.02,
|
31 |
+
"intermediate_size": 3072,
|
32 |
+
"layer_norm_eps": 1e-05,
|
33 |
+
"max_position_embeddings": 4098,
|
34 |
+
"model_type": "longformer",
|
35 |
+
"num_attention_heads": 12,
|
36 |
+
"num_hidden_layers": 12,
|
37 |
+
"pad_token_id": 1,
|
38 |
+
"position_embedding_type": "absolute",
|
39 |
+
"sep_token_id": 2,
|
40 |
+
"torch_dtype": "float32",
|
41 |
+
"transformers_version": "4.20.1",
|
42 |
+
"type_vocab_size": 1,
|
43 |
+
"use_cache": true,
|
44 |
+
"vocab_size": 50265
|
45 |
+
}
|
eval_nbest_predictions.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
eval_predictions.json
ADDED
@@ -0,0 +1,502 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"6023-2": "better shot",
|
3 |
+
"8982-1": "fast release",
|
4 |
+
"8163-1": "immediate background",
|
5 |
+
"4019-2": "second column",
|
6 |
+
"1969-1": "maintenance work",
|
7 |
+
"847-2": "active person",
|
8 |
+
"12419-2": "last bar",
|
9 |
+
"5047-1": "street signs",
|
10 |
+
"9656-1": "greatest exposure",
|
11 |
+
"6861-2": "sudden death",
|
12 |
+
"9301-1": "quick run",
|
13 |
+
"8792-2": "free hit",
|
14 |
+
"12610-1": "Spring Rain",
|
15 |
+
"9645-1": "delivery model",
|
16 |
+
"14275-2": "warning signal",
|
17 |
+
"14699-2": "later conflict",
|
18 |
+
"1173-1": "occupied state",
|
19 |
+
"8990-2": "fledgling system",
|
20 |
+
"2100-1": "whole power",
|
21 |
+
"7224-2": "resting time",
|
22 |
+
"6198-1": "open book",
|
23 |
+
"4424-2": "altered production",
|
24 |
+
"12127-2": "film material",
|
25 |
+
"9371-2": "order forms",
|
26 |
+
"4373-2": "clear dividing line",
|
27 |
+
"12929-1": "bright light",
|
28 |
+
"14667-2": "Take Care",
|
29 |
+
"3361-1": "official guardian",
|
30 |
+
"9507-2": "top center",
|
31 |
+
"12411-1": "entire state",
|
32 |
+
"1889-1": "joint defense",
|
33 |
+
"10998-2": "regular period",
|
34 |
+
"6613-2": "universal manner",
|
35 |
+
"4376-2": "first hearing",
|
36 |
+
"11356-2": "single source",
|
37 |
+
"8130-2": "good value",
|
38 |
+
"8684-2": "sufficient material",
|
39 |
+
"8969-1": "principal types",
|
40 |
+
"8757-1": "one place",
|
41 |
+
"14613-2": "significant trail",
|
42 |
+
"3888-1": "greater clarity",
|
43 |
+
"3809-2": "physical performance",
|
44 |
+
"7088-1": "formal level",
|
45 |
+
"7192-1": "Local records",
|
46 |
+
"702-1": "second property",
|
47 |
+
"14056-1": "long relationship",
|
48 |
+
"13251-2": "clear position",
|
49 |
+
"8432-1": "similar record",
|
50 |
+
"4044-1": "live nights",
|
51 |
+
"8539-1": "religious organization",
|
52 |
+
"1206-1": "desired piece",
|
53 |
+
"5936-2": "standard character",
|
54 |
+
"8762-1": "bottom table",
|
55 |
+
"2812-2": "good links",
|
56 |
+
"10527-1": "independent operation",
|
57 |
+
"9940-2": "former shape",
|
58 |
+
"13640-1": "local assembly",
|
59 |
+
"1910-2": "supply water",
|
60 |
+
"14373-1": "US study",
|
61 |
+
"13707-1": "full order",
|
62 |
+
"8648-2": "visual direction",
|
63 |
+
"3743-1": "homecoming ceremony",
|
64 |
+
"7902-2": "political track",
|
65 |
+
"3207-1": "1st person",
|
66 |
+
"5964-2": "first seat",
|
67 |
+
"8704-1": "repayment period",
|
68 |
+
"3529-1": "common site",
|
69 |
+
"14975-1": "solid support",
|
70 |
+
"14545-1": "major order",
|
71 |
+
"11072-2": "secondary concern",
|
72 |
+
"4610-1": "boiling point",
|
73 |
+
"6740-1": "central activity",
|
74 |
+
"14278-1": "one \"unit",
|
75 |
+
"13627-2": "key tracks",
|
76 |
+
"8911-1": "small association",
|
77 |
+
"317-2": "death party",
|
78 |
+
"1746-2": "possible future development",
|
79 |
+
"14898-1": "big brand",
|
80 |
+
"10977-1": "second development",
|
81 |
+
"8244-2": "processing station",
|
82 |
+
"3992-1": "consistent exercise",
|
83 |
+
"14530-2": "fluid movement",
|
84 |
+
"14488-1": "whole cluster",
|
85 |
+
"6898-1": "regular school",
|
86 |
+
"14501-1": "natural limit",
|
87 |
+
"14165-2": "major west",
|
88 |
+
"11835-1": "larger body",
|
89 |
+
"9139-1": "notable event",
|
90 |
+
"4446-2": "healing force",
|
91 |
+
"11629-1": "entire channel",
|
92 |
+
"13755-1": "simple table",
|
93 |
+
"8523-1": "ideal unit",
|
94 |
+
"2530-2": "varying nature",
|
95 |
+
"14162-2": "first stroke",
|
96 |
+
"365-2": "long thread",
|
97 |
+
"7413-1": "new flagship",
|
98 |
+
"4560-2": "Winter Games",
|
99 |
+
"4381-1": "party tickets",
|
100 |
+
"4411-1": "another stem",
|
101 |
+
"8191-2": "general nature",
|
102 |
+
"3356-2": "hand positions",
|
103 |
+
"719-1": "force requirements",
|
104 |
+
"2023-1": "single side",
|
105 |
+
"376-2": "highly complex work",
|
106 |
+
"14432-2": "Current service",
|
107 |
+
"14943-2": "contact point",
|
108 |
+
"7823-2": "similar matter",
|
109 |
+
"13030-1": "14 and 16 year",
|
110 |
+
"11275-2": "deep division",
|
111 |
+
"2655-2": "one ex",
|
112 |
+
"8523-2": "ideal unit",
|
113 |
+
"9006-1": "last word",
|
114 |
+
"6674-2": "entire load",
|
115 |
+
"13889-2": "final top",
|
116 |
+
"5009-1": "job site",
|
117 |
+
"8130-1": "good value",
|
118 |
+
"8463-1": "two shoulder",
|
119 |
+
"864-1": "rough shape",
|
120 |
+
"624-2": "golden rule",
|
121 |
+
"11509-1": "metal material",
|
122 |
+
"3634-2": "education structure",
|
123 |
+
"5541-2": "next unit",
|
124 |
+
"12425-2": "local circuit",
|
125 |
+
"4313-2": "single spring",
|
126 |
+
"8848-1": "current subject",
|
127 |
+
"10521-1": "stronger claim",
|
128 |
+
"5972-2": "New Mexico game",
|
129 |
+
"4304-1": "first turn",
|
130 |
+
"5914-1": "true sense",
|
131 |
+
"8614-1": "one subdivision",
|
132 |
+
"2734-1": "Initial word",
|
133 |
+
"5314-2": "given decision",
|
134 |
+
"8785-1": "longest performance",
|
135 |
+
"9813-1": "reasonable level",
|
136 |
+
"14241-2": "immediate creation",
|
137 |
+
"3947-1": "useful material",
|
138 |
+
"3861-2": "one pack",
|
139 |
+
"11410-2": "one mass",
|
140 |
+
"12563-2": "Modern point",
|
141 |
+
"947-1": "early interception",
|
142 |
+
"14518-1": "modern port",
|
143 |
+
"4053-1": "informal economy",
|
144 |
+
"3846-2": "later piece",
|
145 |
+
"7133-1": "original volume",
|
146 |
+
"2905-2": "sufficient capacity",
|
147 |
+
"12791-2": "teaching practice",
|
148 |
+
"10565-2": "open structure",
|
149 |
+
"936-1": "latest party",
|
150 |
+
"5301-1": "black suit",
|
151 |
+
"12746-2": "initial activity",
|
152 |
+
"987-2": "stronger line",
|
153 |
+
"14073-1": "similar power",
|
154 |
+
"6750-1": "direct sight",
|
155 |
+
"11557-2": "ideal figure",
|
156 |
+
"10572-2": "practice level",
|
157 |
+
"8626-2": "common division",
|
158 |
+
"14262-1": "One Way Glass",
|
159 |
+
"14132-2": "one chart",
|
160 |
+
"11151-2": "available program",
|
161 |
+
"9817-2": "Greek system",
|
162 |
+
"9956-2": "specific connection",
|
163 |
+
"8967-2": "one mini",
|
164 |
+
"4908-2": "one possession",
|
165 |
+
"5626-2": "alternate point",
|
166 |
+
"4419-2": "transitional season",
|
167 |
+
"1953-1": "large spread",
|
168 |
+
"10901-1": "much voice",
|
169 |
+
"14349-2": "clear drop",
|
170 |
+
"8315-1": "precise formation",
|
171 |
+
"11420-1": "strong division",
|
172 |
+
"9348-1": "2018 update",
|
173 |
+
"3744-1": "main selection",
|
174 |
+
"2997-1": "landmark point",
|
175 |
+
"8213-1": "food dish",
|
176 |
+
"9037-2": "political deliberation",
|
177 |
+
"387-1": "direct pressure",
|
178 |
+
"3465-1": "random time",
|
179 |
+
"14275-1": "warning signal",
|
180 |
+
"7095-1": "popular voice",
|
181 |
+
"12200-1": "full leg",
|
182 |
+
"6694-2": "long stage",
|
183 |
+
"9791-1": "good exposure",
|
184 |
+
"14354-2": "absolute gift",
|
185 |
+
"417-1": "significant gap",
|
186 |
+
"9599-1": "large beam",
|
187 |
+
"12147-2": "live one",
|
188 |
+
"3077-1": "public desire",
|
189 |
+
"3845-2": "single unit",
|
190 |
+
"389-1": "last bit",
|
191 |
+
"6090-1": "well furnished",
|
192 |
+
"9486-2": "significant fall",
|
193 |
+
"8769-2": "Another resolution",
|
194 |
+
"11686-1": "Field's life",
|
195 |
+
"14541-1": "single sentence",
|
196 |
+
"2923-1": "better range",
|
197 |
+
"8065-1": "right approach",
|
198 |
+
"12662-2": "government advice",
|
199 |
+
"8661-2": "physical exchange",
|
200 |
+
"6160-2": "usual language",
|
201 |
+
"8580-2": "full structure",
|
202 |
+
"10177-1": "precious time",
|
203 |
+
"2851-2": "work measures",
|
204 |
+
"10935-1": "powerful spring",
|
205 |
+
"4181-2": "2D model",
|
206 |
+
"7139-1": "last two cases",
|
207 |
+
"4313-1": "single spring",
|
208 |
+
"433-1": "actual activity",
|
209 |
+
"2183-2": "major hunt",
|
210 |
+
"4140-2": "quick recognition",
|
211 |
+
"9077-2": "property types",
|
212 |
+
"8910-1": "solid center",
|
213 |
+
"7588-1": "strong movement",
|
214 |
+
"9215-1": "site visits",
|
215 |
+
"11029-1": "regular practice",
|
216 |
+
"14410-1": "first suggestion",
|
217 |
+
"14211-1": "stronger line",
|
218 |
+
"8895-2": "common source",
|
219 |
+
"3397-1": "certain condition",
|
220 |
+
"4230-1": "common program",
|
221 |
+
"719-2": "force requirements",
|
222 |
+
"14224-1": "controversial temperament",
|
223 |
+
"12144-1": "specialised nature",
|
224 |
+
"8990-1": "fledgling system",
|
225 |
+
"3465-2": "random time",
|
226 |
+
"4453-2": "certain evidence",
|
227 |
+
"4604-2": "text line",
|
228 |
+
"8108-1": "whole scene",
|
229 |
+
"9656-2": "greatest exposure",
|
230 |
+
"6241-2": "open framework",
|
231 |
+
"4700-1": "second case",
|
232 |
+
"13660-2": "release effort",
|
233 |
+
"7265-2": "overall position",
|
234 |
+
"9693-1": "Commercial material",
|
235 |
+
"9348-2": "2010 study",
|
236 |
+
"14196-1": "previous host",
|
237 |
+
"7107-1": "functional level",
|
238 |
+
"11350-2": "Activity Period",
|
239 |
+
"721-1": "quick rate",
|
240 |
+
"4373-1": "clear dividing line",
|
241 |
+
"6825-1": "broader market",
|
242 |
+
"1128-2": "initial degree",
|
243 |
+
"8441-1": "network's channels",
|
244 |
+
"14263-1": "unusual variation",
|
245 |
+
"14966-1": "exact expression",
|
246 |
+
"5138-2": "particular account",
|
247 |
+
"7445-1": "varying strength",
|
248 |
+
"3390-1": "narrow waist",
|
249 |
+
"4315-1": "quicker response",
|
250 |
+
"7925-2": "decisive point",
|
251 |
+
"25-1": "similar finish",
|
252 |
+
"10466-1": "one good",
|
253 |
+
"3812-2": "predictable course",
|
254 |
+
"2355-2": "primary season",
|
255 |
+
"8792-1": "free hit",
|
256 |
+
"3398-2": "certain condition",
|
257 |
+
"7503-1": "top figure",
|
258 |
+
"6694-1": "long stage",
|
259 |
+
"8065-2": "right approach",
|
260 |
+
"174-1": "normal number",
|
261 |
+
"6971-2": "Heritage groups",
|
262 |
+
"14643-2": "bright line",
|
263 |
+
"6556-2": "stop signal",
|
264 |
+
"10661-2": "field position",
|
265 |
+
"14914-2": "negative state",
|
266 |
+
"6086-1": "suitable ground",
|
267 |
+
"8011-1": "whole parts",
|
268 |
+
"9424-2": "corresponding range",
|
269 |
+
"8133-1": "fixed meeting place",
|
270 |
+
"13587-1": "heart land",
|
271 |
+
"3370-2": "quick transfer",
|
272 |
+
"489-1": "particular bond",
|
273 |
+
"14437-1": "whole club",
|
274 |
+
"3897-2": "5th stage",
|
275 |
+
"3366-1": "Community stage",
|
276 |
+
"12467-2": "another bar",
|
277 |
+
"10138-2": "single square",
|
278 |
+
"1668-2": "exceptional first season",
|
279 |
+
"13860-1": "labour time",
|
280 |
+
"6880-2": "full round",
|
281 |
+
"7837-1": "first component",
|
282 |
+
"11313-1": "primary class",
|
283 |
+
"14943-1": "contact point",
|
284 |
+
"12461-1": "personal identification",
|
285 |
+
"3220-1": "even empty shells",
|
286 |
+
"10059-1": "quick passing",
|
287 |
+
"191-1": "different case",
|
288 |
+
"936-2": "latest party",
|
289 |
+
"12579-1": "founder group",
|
290 |
+
"7926-2": "desired character",
|
291 |
+
"6014-2": "strong movement",
|
292 |
+
"9628-1": "higher place",
|
293 |
+
"8708-1": "key goods",
|
294 |
+
"14845-1": "default position",
|
295 |
+
"14234-1": "lower extension",
|
296 |
+
"8667-1": "surgical division",
|
297 |
+
"6257-2": "long cross",
|
298 |
+
"9920-2": "attached agency",
|
299 |
+
"399-2": "full contact",
|
300 |
+
"9700-1": "separate seat",
|
301 |
+
"2401-2": "light parts",
|
302 |
+
"14771-1": "bridge foundation",
|
303 |
+
"4155-2": "general point",
|
304 |
+
"13689-1": "wrong base",
|
305 |
+
"8766-1": "released material",
|
306 |
+
"14353-2": "cut line",
|
307 |
+
"4042-1": "archbishop's chair",
|
308 |
+
"13531-1": "common focus",
|
309 |
+
"3287-1": "German post",
|
310 |
+
"4482-2": "known culture",
|
311 |
+
"10718-2": "average looks",
|
312 |
+
"4600-1": "actual coverage",
|
313 |
+
"3356-1": "hand positions",
|
314 |
+
"4463-2": "second view",
|
315 |
+
"6572-1": "whole trouble",
|
316 |
+
"4865-1": "main class",
|
317 |
+
"572-2": "house members",
|
318 |
+
"11612-2": "general limitation",
|
319 |
+
"323-1": "modern work",
|
320 |
+
"10138-1": "single square",
|
321 |
+
"6030-2": "drum design",
|
322 |
+
"3259-1": "specific judge",
|
323 |
+
"11305-1": "normal home",
|
324 |
+
"14964-2": "Language Blocks",
|
325 |
+
"2455-1": "top five",
|
326 |
+
"13640-2": "Local assembly",
|
327 |
+
"12634-2": "particular property",
|
328 |
+
"3620-2": "zero sum game",
|
329 |
+
"14429-1": "new case",
|
330 |
+
"9133-2": "second radio",
|
331 |
+
"1018-2": "regular period",
|
332 |
+
"9645-2": "delivery model",
|
333 |
+
"14276-2": "fine position",
|
334 |
+
"6715-2": "normal action",
|
335 |
+
"8756-1": "second function",
|
336 |
+
"13506-2": "popular convention",
|
337 |
+
"14041-1": "general treatment",
|
338 |
+
"4436-2": "weaver family",
|
339 |
+
"13641-1": "next number",
|
340 |
+
"8149-2": "light fire",
|
341 |
+
"559-1": "little return",
|
342 |
+
"3451-1": "Dutch tool",
|
343 |
+
"14612-1": "external defense",
|
344 |
+
"13718-1": "current body",
|
345 |
+
"9494-2": "fourth line",
|
346 |
+
"3067-1": "Public Work",
|
347 |
+
"12776-1": "distribution side",
|
348 |
+
"2687-2": "actual figure",
|
349 |
+
"5185-2": "first master",
|
350 |
+
"6880-1": "full round",
|
351 |
+
"14473-2": "strict manner",
|
352 |
+
"4545-2": "formal office",
|
353 |
+
"9272-2": "increasing wave",
|
354 |
+
"3706-1": "consistent pattern",
|
355 |
+
"12195-2": "exact time period",
|
356 |
+
"9934-2": "small association",
|
357 |
+
"14549-2": "value conflicts",
|
358 |
+
"14458-1": "lightning strokes",
|
359 |
+
"10952-2": "form groups",
|
360 |
+
"4419-1": "transitional season",
|
361 |
+
"10141-2": "cold comfort",
|
362 |
+
"6233-2": "subsequent test",
|
363 |
+
"9337-1": "Standby power",
|
364 |
+
"14437-2": "whole club",
|
365 |
+
"3961-1": "remaining force",
|
366 |
+
"13553-1": "major breaches",
|
367 |
+
"12692-1": "property laws",
|
368 |
+
"3348-1": "unique series",
|
369 |
+
"14845-2": "default position",
|
370 |
+
"9324-1": "simple matter",
|
371 |
+
"8999-2": "every score",
|
372 |
+
"9020-2": "natural monopoly",
|
373 |
+
"3740-1": "official returns",
|
374 |
+
"14245-2": "lead female",
|
375 |
+
"5914-2": "true sense",
|
376 |
+
"3364-2": "set rate",
|
377 |
+
"8795-1": "lengthy set",
|
378 |
+
"11831-2": "following wave",
|
379 |
+
"4397-2": "one attribute",
|
380 |
+
"11063-1": "fine place",
|
381 |
+
"423-2": "existing class",
|
382 |
+
"14084-2": "land sharks",
|
383 |
+
"12723-1": "heavy rock",
|
384 |
+
"2724-1": "different play",
|
385 |
+
"9545-2": "rich world",
|
386 |
+
"9959-2": "particular action",
|
387 |
+
"9211-1": "overall capital",
|
388 |
+
"8637-1": "good record",
|
389 |
+
"12418-2": "fusion center",
|
390 |
+
"2692-1": "one piece",
|
391 |
+
"2544-1": "local action",
|
392 |
+
"4491-2": "sudden collapse",
|
393 |
+
"12817-1": "building houses",
|
394 |
+
"2139-2": "right version",
|
395 |
+
"10379-2": "third body",
|
396 |
+
"9705-1": "important spot",
|
397 |
+
"4170-1": "bicycle club",
|
398 |
+
"10706-1": "play structure",
|
399 |
+
"12325-2": "state transition",
|
400 |
+
"9744-2": "offensive manner",
|
401 |
+
"8930-1": "extra help",
|
402 |
+
"7051-1": "main vehicle",
|
403 |
+
"9210-2": "fast movements",
|
404 |
+
"6610-2": "small length",
|
405 |
+
"14476-1": "local players",
|
406 |
+
"9364-1": "defensive line",
|
407 |
+
"3863-1": "live one",
|
408 |
+
"882-2": "foundation experience",
|
409 |
+
"14026-1": "high concentrate",
|
410 |
+
"14361-2": "cheap way",
|
411 |
+
"3295-1": "regular period",
|
412 |
+
"3004-2": "open path",
|
413 |
+
"6399-2": "planned home",
|
414 |
+
"9165-1": "hollow area",
|
415 |
+
"4206-2": "total interest",
|
416 |
+
"3204-2": "Another address",
|
417 |
+
"14079-2": "power tools",
|
418 |
+
"10887-2": "fine thread",
|
419 |
+
"11569-1": "final frame",
|
420 |
+
"186-2": "Official surveys",
|
421 |
+
"12022-2": "immediate neighborhood",
|
422 |
+
"6613-1": "universal manner",
|
423 |
+
"5290-1": "official limits",
|
424 |
+
"11627-2": "major stature",
|
425 |
+
"9879-2": "former charge",
|
426 |
+
"1306-2": "negative material",
|
427 |
+
"6871-1": "stronger tone",
|
428 |
+
"8587-1": "modern restoration",
|
429 |
+
"13896-1": "one theatre",
|
430 |
+
"14615-1": "technical quality",
|
431 |
+
"14073-2": "similar power",
|
432 |
+
"11071-2": "secondary concern",
|
433 |
+
"2061-1": "old one",
|
434 |
+
"11467-2": "main test",
|
435 |
+
"11461-1": "desired tone",
|
436 |
+
"12147-1": "live one",
|
437 |
+
"11276-1": "quality production",
|
438 |
+
"4491-1": "sudden collapse",
|
439 |
+
"7313-1": "term \"body",
|
440 |
+
"14443-2": "local camps",
|
441 |
+
"10762-1": "original panel",
|
442 |
+
"8722-1": "good measure",
|
443 |
+
"7711-1": "high loads",
|
444 |
+
"2077-2": "popular track",
|
445 |
+
"4716-1": "real nature",
|
446 |
+
"14229-1": "possible range",
|
447 |
+
"8678-2": "longer course",
|
448 |
+
"8317-1": "licence area",
|
449 |
+
"10223-1": "long sleep",
|
450 |
+
"4287-2": "undocumented population",
|
451 |
+
"2314-1": "different unit",
|
452 |
+
"8389-2": "final disappearance",
|
453 |
+
"7589-2": "1 credit",
|
454 |
+
"962-1": "actual board",
|
455 |
+
"4146-2": "woman's side",
|
456 |
+
"2492-1": "get to know",
|
457 |
+
"7782-2": "either year",
|
458 |
+
"11009-2": "present breed",
|
459 |
+
"8819-2": "direct source",
|
460 |
+
"3459-1": "2010/11 campaign",
|
461 |
+
"3730-1": "complete protection",
|
462 |
+
"4604-1": "text line",
|
463 |
+
"8515-2": "significant foundation",
|
464 |
+
"7297-2": "notable facility",
|
465 |
+
"1313-1": "big cut",
|
466 |
+
"11072-1": "secondary concern",
|
467 |
+
"11232-1": "similar notice",
|
468 |
+
"4152-1": "full one",
|
469 |
+
"8219-1": "central process",
|
470 |
+
"14019-2": "similar direction",
|
471 |
+
"9375-1": "special meaning",
|
472 |
+
"11355-2": "sufficient interest",
|
473 |
+
"7727-2": "one classification",
|
474 |
+
"8516-2": "empty position",
|
475 |
+
"4181-1": "2D model",
|
476 |
+
"9237-1": "initial character",
|
477 |
+
"5041-2": "initial strain",
|
478 |
+
"4610-2": "boiling point",
|
479 |
+
"3715-1": "campaign success",
|
480 |
+
"4449-2": "major use",
|
481 |
+
"4395-2": "larger body",
|
482 |
+
"2622-2": "least one entry",
|
483 |
+
"7861-1": "correct accounts",
|
484 |
+
"6809-2": "good links",
|
485 |
+
"10239-1": "critical point",
|
486 |
+
"14839-2": "shared background",
|
487 |
+
"5632-2": "round table",
|
488 |
+
"8715-2": "entire test",
|
489 |
+
"3437-2": "community contributions",
|
490 |
+
"13956-1": "second release in 1973 with an orange sleeve. In both versions, the first side contained 10 stories narrated by Laura Olsher, complete with sound effects",
|
491 |
+
"2812-1": "good links",
|
492 |
+
"8198-2": "smaller sum",
|
493 |
+
"13843-2": "final sum",
|
494 |
+
"1098-2": "whole race",
|
495 |
+
"8765-1": "central reason",
|
496 |
+
"4216-1": "entire case",
|
497 |
+
"9762-2": "another edition",
|
498 |
+
"9554-2": "construction stage",
|
499 |
+
"1485-2": "official cables",
|
500 |
+
"6026-2": "ticker tape",
|
501 |
+
"679-1": "undefined point"
|
502 |
+
}
|
eval_results.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 2.0,
|
3 |
+
"eval_exact_match": 69.39999999999999,
|
4 |
+
"eval_f1": 69.43,
|
5 |
+
"eval_samples": 500
|
6 |
+
}
|
finetune_logs.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:341e2423dc7b10c4c66b0cebdd8a7135cb3b4569a77189bfa8e8b59db0c2a06b
|
3 |
+
size 592407201
|
special_tokens_map.json
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "<s>",
|
3 |
+
"cls_token": "<s>",
|
4 |
+
"eos_token": "</s>",
|
5 |
+
"mask_token": {
|
6 |
+
"content": "<mask>",
|
7 |
+
"lstrip": true,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false
|
11 |
+
},
|
12 |
+
"pad_token": "<pad>",
|
13 |
+
"sep_token": "</s>",
|
14 |
+
"unk_token": "<unk>"
|
15 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_prefix_space": false,
|
3 |
+
"bos_token": "<s>",
|
4 |
+
"cls_token": "<s>",
|
5 |
+
"eos_token": "</s>",
|
6 |
+
"errors": "replace",
|
7 |
+
"mask_token": "<mask>",
|
8 |
+
"model_max_length": 4096,
|
9 |
+
"name_or_path": "../results/phrase_retrieval/PR-pass/qa/allenai/longformer-base-4096/finetuned",
|
10 |
+
"pad_token": "<pad>",
|
11 |
+
"sep_token": "</s>",
|
12 |
+
"special_tokens_map_file": null,
|
13 |
+
"tokenizer_class": "LongformerTokenizer",
|
14 |
+
"trim_offsets": true,
|
15 |
+
"unk_token": "<unk>"
|
16 |
+
}
|
train_results.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 2.0,
|
3 |
+
"train_loss": 0.9066677579012784,
|
4 |
+
"train_runtime": 893.6642,
|
5 |
+
"train_samples": 1650,
|
6 |
+
"train_samples_per_second": 3.693,
|
7 |
+
"train_steps_per_second": 1.846
|
8 |
+
}
|
trainer_state.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"global_step": 1650,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 0.61,
|
12 |
+
"learning_rate": 2.090909090909091e-05,
|
13 |
+
"loss": 1.2525,
|
14 |
+
"step": 500
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"epoch": 1.21,
|
18 |
+
"learning_rate": 1.1818181818181819e-05,
|
19 |
+
"loss": 1.0478,
|
20 |
+
"step": 1000
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"epoch": 1.82,
|
24 |
+
"learning_rate": 2.7272727272727272e-06,
|
25 |
+
"loss": 0.5,
|
26 |
+
"step": 1500
|
27 |
+
},
|
28 |
+
{
|
29 |
+
"epoch": 2.0,
|
30 |
+
"step": 1650,
|
31 |
+
"total_flos": 8622543790080000.0,
|
32 |
+
"train_loss": 0.9066677579012784,
|
33 |
+
"train_runtime": 893.6642,
|
34 |
+
"train_samples_per_second": 3.693,
|
35 |
+
"train_steps_per_second": 1.846
|
36 |
+
}
|
37 |
+
],
|
38 |
+
"max_steps": 1650,
|
39 |
+
"num_train_epochs": 2,
|
40 |
+
"total_flos": 8622543790080000.0,
|
41 |
+
"trial_name": null,
|
42 |
+
"trial_params": null
|
43 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa622283c41db520921d00be29991b71b3a97d624437552ec2210f1e82901f31
|
3 |
+
size 3375
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|