Update repository
Browse files- README.md +6 -0
- config.json +5 -0
- log/train/events.out.tfevents.1639092823.allennlp-server4 +3 -0
- log/validation/events.out.tfevents.1639092823.allennlp-server4 +3 -0
- metrics.json +21 -21
- weights.th +1 -1
README.md
CHANGED
@@ -9,4 +9,10 @@ tags:
|
|
9 |
- allennlp
|
10 |
---
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
This is an implementation of the BiDAF model with GloVe embeddings. The basic layout is pretty simple: encode words as a combination of word embeddings and a character-level encoder, pass the word representations through a bi-LSTM/GRU, use a matrix of attentions to put question information into the passage word representations (this is the only part that is at all non-standard), pass this through another few layers of bi-LSTMs/GRUs, and do a softmax over span start and span end.
|
|
|
9 |
- allennlp
|
10 |
---
|
11 |
|
12 |
+
# TODO: Fill this model card
|
13 |
+
---
|
14 |
+
tags:
|
15 |
+
- allennlp
|
16 |
+
---
|
17 |
+
|
18 |
This is an implementation of the BiDAF model with GloVe embeddings. The basic layout is pretty simple: encode words as a combination of word embeddings and a character-level encoder, pass the word representations through a bi-LSTM/GRU, use a matrix of attentions to put question information into the passage word representations (this is the only part that is at all non-standard), pass this through another few layers of bi-LSTMs/GRUs, and do a softmax over span start and span end.
|
config.json
CHANGED
@@ -83,6 +83,11 @@
|
|
83 |
"train_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-train-v1.1.json",
|
84 |
"validation_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-dev-v1.1.json",
|
85 |
"trainer": {
|
|
|
|
|
|
|
|
|
|
|
86 |
"grad_norm": 5,
|
87 |
"learning_rate_scheduler": {
|
88 |
"type": "reduce_on_plateau",
|
|
|
83 |
"train_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-train-v1.1.json",
|
84 |
"validation_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-dev-v1.1.json",
|
85 |
"trainer": {
|
86 |
+
"callbacks": [
|
87 |
+
{
|
88 |
+
"type": "tensorboard"
|
89 |
+
}
|
90 |
+
],
|
91 |
"grad_norm": 5,
|
92 |
"learning_rate_scheduler": {
|
93 |
"type": "reduce_on_plateau",
|
log/train/events.out.tfevents.1639092823.allennlp-server4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d101f26f68f30b20e2b5b7865eb9d1c9b1ede4ecab69bce14a0619e15bf6003
|
3 |
+
size 183516
|
log/validation/events.out.tfevents.1639092823.allennlp-server4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6683ee1c443432c0726f6284003d203e8470dfef397509e4a1ef733fc481e4ac
|
3 |
+
size 6560
|
metrics.json
CHANGED
@@ -1,27 +1,27 @@
|
|
1 |
{
|
2 |
"best_epoch": 10,
|
3 |
-
"peak_worker_0_memory_MB":
|
4 |
"peak_gpu_0_memory_MB": 2483.1318359375,
|
5 |
-
"training_duration": "1:
|
6 |
"epoch": 19,
|
7 |
-
"training_start_acc": 0.
|
8 |
-
"training_end_acc": 0.
|
9 |
-
"training_span_acc": 0.
|
10 |
-
"training_em": 0.
|
11 |
-
"training_f1": 0.
|
12 |
-
"training_loss": 0.
|
13 |
-
"training_worker_0_memory_MB":
|
14 |
"training_gpu_0_memory_MB": 2470.0771484375,
|
15 |
-
"validation_start_acc": 0.
|
16 |
-
"validation_end_acc": 0.
|
17 |
-
"validation_span_acc": 0.
|
18 |
-
"validation_em": 0.
|
19 |
-
"validation_f1": 0.
|
20 |
-
"validation_loss": 3.
|
21 |
-
"best_validation_start_acc": 0.
|
22 |
-
"best_validation_end_acc": 0.
|
23 |
-
"best_validation_span_acc": 0.
|
24 |
-
"best_validation_em": 0.
|
25 |
-
"best_validation_f1": 0.
|
26 |
-
"best_validation_loss": 2.
|
27 |
}
|
|
|
1 |
{
|
2 |
"best_epoch": 10,
|
3 |
+
"peak_worker_0_memory_MB": 10187.48828125,
|
4 |
"peak_gpu_0_memory_MB": 2483.1318359375,
|
5 |
+
"training_duration": "1:18:19.087825",
|
6 |
"epoch": 19,
|
7 |
+
"training_start_acc": 0.8339136291510177,
|
8 |
+
"training_end_acc": 0.8794849256270049,
|
9 |
+
"training_span_acc": 0.7644493658603408,
|
10 |
+
"training_em": 0.8141759609127958,
|
11 |
+
"training_f1": 0.9123879556752282,
|
12 |
+
"training_loss": 0.8476112175613778,
|
13 |
+
"training_worker_0_memory_MB": 10187.48828125,
|
14 |
"training_gpu_0_memory_MB": 2470.0771484375,
|
15 |
+
"validation_start_acc": 0.6137180700094608,
|
16 |
+
"validation_end_acc": 0.6549668874172185,
|
17 |
+
"validation_span_acc": 0.5263008514664144,
|
18 |
+
"validation_em": 0.660170293282876,
|
19 |
+
"validation_f1": 0.7624231918887229,
|
20 |
+
"validation_loss": 3.598719547829538,
|
21 |
+
"best_validation_start_acc": 0.6228003784295175,
|
22 |
+
"best_validation_end_acc": 0.6637653736991486,
|
23 |
+
"best_validation_span_acc": 0.5383159886471145,
|
24 |
+
"best_validation_em": 0.6693472090823084,
|
25 |
+
"best_validation_f1": 0.7684872331354092,
|
26 |
+
"best_validation_loss": 2.8135785696641453
|
27 |
}
|
weights.th
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 49008050
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8cc6c680744855d63a0a23ce9a6ab4d6c62b5defa4c1c2360252329a7b55fd0e
|
3 |
size 49008050
|