ryo0634 commited on
Commit
558a735
1 Parent(s): df4eb1f

commit the model and tokenizer.

Browse files
.gitattributes CHANGED
@@ -25,3 +25,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ entity_vocab.json filter=lfs diff=lfs merge=lfs -text
added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "<ent2>": 50266,
3
+ "<ent>": 50265
4
+ }
config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "huggingface_models/luke-base-comp-20201201",
3
+ "architectures": [
4
+ "LukeForMaskedLM"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bert_model_name": "roberta-base",
8
+ "bos_token_id": 0,
9
+ "entity_emb_size": 256,
10
+ "entity_vocab_size": 5987096,
11
+ "eos_token_id": 2,
12
+ "gradient_checkpointing": false,
13
+ "hidden_act": "gelu",
14
+ "hidden_dropout_prob": 0.1,
15
+ "hidden_size": 768,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 3072,
18
+ "layer_norm_eps": 1e-05,
19
+ "max_position_embeddings": 514,
20
+ "model_type": "luke",
21
+ "num_attention_heads": 12,
22
+ "num_hidden_layers": 12,
23
+ "output_past": true,
24
+ "pad_token_id": 1,
25
+ "position_embedding_type": "absolute",
26
+ "torch_dtype": "float32",
27
+ "transformers_version": "4.20.0.dev0",
28
+ "type_vocab_size": 1,
29
+ "use_cache": true,
30
+ "use_entity_aware_attention": true,
31
+ "vocab_size": 50267
32
+ }
entity_vocab.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15036be83edeb1dca3f020cdb1f581af6e2f114fb500046e9f10e029d6b87cd0
3
+ size 208387837
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58511e07256f454149aa79aff9d4833563a70d2f5f18c66584db282e360c4e2f
3
+ size 6744195187
special_tokens_map.json ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<ent>",
5
+ "lstrip": false,
6
+ "normalized": true,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<ent2>",
12
+ "lstrip": false,
13
+ "normalized": true,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ }
17
+ ],
18
+ "bos_token": {
19
+ "content": "<s>",
20
+ "lstrip": false,
21
+ "normalized": true,
22
+ "rstrip": false,
23
+ "single_word": false
24
+ },
25
+ "cls_token": {
26
+ "content": "<s>",
27
+ "lstrip": false,
28
+ "normalized": true,
29
+ "rstrip": false,
30
+ "single_word": false
31
+ },
32
+ "eos_token": {
33
+ "content": "</s>",
34
+ "lstrip": false,
35
+ "normalized": true,
36
+ "rstrip": false,
37
+ "single_word": false
38
+ },
39
+ "mask_token": {
40
+ "content": "<mask>",
41
+ "lstrip": true,
42
+ "normalized": true,
43
+ "rstrip": false,
44
+ "single_word": false
45
+ },
46
+ "pad_token": {
47
+ "content": "<pad>",
48
+ "lstrip": false,
49
+ "normalized": true,
50
+ "rstrip": false,
51
+ "single_word": false
52
+ },
53
+ "sep_token": {
54
+ "content": "</s>",
55
+ "lstrip": false,
56
+ "normalized": true,
57
+ "rstrip": false,
58
+ "single_word": false
59
+ },
60
+ "unk_token": {
61
+ "content": "<unk>",
62
+ "lstrip": false,
63
+ "normalized": true,
64
+ "rstrip": false,
65
+ "single_word": false
66
+ }
67
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "additional_special_tokens": [
4
+ {
5
+ "__type": "AddedToken",
6
+ "content": "<ent>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ {
13
+ "__type": "AddedToken",
14
+ "content": "<ent2>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ {
21
+ "__type": "AddedToken",
22
+ "content": "<ent>",
23
+ "lstrip": false,
24
+ "normalized": true,
25
+ "rstrip": false,
26
+ "single_word": false
27
+ },
28
+ {
29
+ "__type": "AddedToken",
30
+ "content": "<ent2>",
31
+ "lstrip": false,
32
+ "normalized": true,
33
+ "rstrip": false,
34
+ "single_word": false
35
+ }
36
+ ],
37
+ "bos_token": {
38
+ "__type": "AddedToken",
39
+ "content": "<s>",
40
+ "lstrip": false,
41
+ "normalized": true,
42
+ "rstrip": false,
43
+ "single_word": false
44
+ },
45
+ "cls_token": {
46
+ "__type": "AddedToken",
47
+ "content": "<s>",
48
+ "lstrip": false,
49
+ "normalized": true,
50
+ "rstrip": false,
51
+ "single_word": false
52
+ },
53
+ "entity_mask2_token": "[MASK2]",
54
+ "entity_mask_token": "[MASK]",
55
+ "entity_pad_token": "[PAD]",
56
+ "entity_token_1": "<ent>",
57
+ "entity_token_2": "<ent2>",
58
+ "entity_unk_token": "[UNK]",
59
+ "eos_token": {
60
+ "__type": "AddedToken",
61
+ "content": "</s>",
62
+ "lstrip": false,
63
+ "normalized": true,
64
+ "rstrip": false,
65
+ "single_word": false
66
+ },
67
+ "errors": "replace",
68
+ "mask_token": {
69
+ "__type": "AddedToken",
70
+ "content": "<mask>",
71
+ "lstrip": true,
72
+ "normalized": true,
73
+ "rstrip": false,
74
+ "single_word": false
75
+ },
76
+ "max_entity_length": 32,
77
+ "max_mention_length": 30,
78
+ "model_max_length": 512,
79
+ "name_or_path": "huggingface_models/luke-base-comp-20201201",
80
+ "pad_token": {
81
+ "__type": "AddedToken",
82
+ "content": "<pad>",
83
+ "lstrip": false,
84
+ "normalized": true,
85
+ "rstrip": false,
86
+ "single_word": false
87
+ },
88
+ "sep_token": {
89
+ "__type": "AddedToken",
90
+ "content": "</s>",
91
+ "lstrip": false,
92
+ "normalized": true,
93
+ "rstrip": false,
94
+ "single_word": false
95
+ },
96
+ "special_tokens_map_file": "/home/li0123/.cache/huggingface/transformers/e2adf9c43ab82e2db47210f87bb53aa544055afc59e35389ffff22cd8fbd5d94.0ead83d1c22a0d79f17fc2eb3b32bd99fd9a262049166681d9713840ae868c2a",
97
+ "task": null,
98
+ "tokenizer_class": "LukeTokenizer",
99
+ "unk_token": {
100
+ "__type": "AddedToken",
101
+ "content": "<unk>",
102
+ "lstrip": false,
103
+ "normalized": true,
104
+ "rstrip": false,
105
+ "single_word": false
106
+ }
107
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff