Diar commited on
Commit
f9ca82a
1 Parent(s): 0b7f404

Uploading files

Browse files
lm1/language_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:727302e44cf26233831209fca1712f01a3ccb8f0487239007e7bf1001b2aeaf1
3
+ size 1883805687
lm1/language_model_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "sentence-transformers/LaBSE",
3
+ "architectures": [
4
+ "BertModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "directionality": "bidi",
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "language": "english",
15
+ "layer_norm_eps": 1e-12,
16
+ "max_position_embeddings": 512,
17
+ "model_type": "dpr",
18
+ "name": "DPRQuestionEncoder",
19
+ "num_attention_heads": 12,
20
+ "num_hidden_layers": 12,
21
+ "pad_token_id": 0,
22
+ "pooler_fc_size": 768,
23
+ "pooler_num_attention_heads": 12,
24
+ "pooler_num_fc_layers": 3,
25
+ "pooler_size_per_head": 128,
26
+ "pooler_type": "first_token_transform",
27
+ "position_embedding_type": "absolute",
28
+ "projection_dim": 0,
29
+ "revision": null,
30
+ "transformers_version": "4.6.0.dev0",
31
+ "type_vocab_size": 2,
32
+ "use_cache": true,
33
+ "vocab_size": 501153
34
+ }
lm2/language_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d293ec78f56a8c0f471480fe13916b7055836e89f5fa1774b48cf1bc35228e7b
3
+ size 1883803575
lm2/language_model_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "sentence-transformers/LaBSE",
3
+ "architectures": [
4
+ "BertModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "directionality": "bidi",
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "language": "english",
15
+ "layer_norm_eps": 1e-12,
16
+ "max_position_embeddings": 512,
17
+ "model_type": "dpr",
18
+ "name": "DPRContextEncoder",
19
+ "num_attention_heads": 12,
20
+ "num_hidden_layers": 12,
21
+ "pad_token_id": 0,
22
+ "pooler_fc_size": 768,
23
+ "pooler_num_attention_heads": 12,
24
+ "pooler_num_fc_layers": 3,
25
+ "pooler_size_per_head": 128,
26
+ "pooler_type": "first_token_transform",
27
+ "position_embedding_type": "absolute",
28
+ "projection_dim": 0,
29
+ "revision": null,
30
+ "transformers_version": "4.6.0.dev0",
31
+ "type_vocab_size": 2,
32
+ "use_cache": true,
33
+ "vocab_size": 501153
34
+ }
passage/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
passage/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": false, "do_basic_tokenize": true, "never_split": null, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": "C:\\Users\\DiarS/.cache\\huggingface\\transformers\\5fb4590a69eca214db9d31f0a4e90637a90fab773b17d382309a27f2a34da5be.dd8bd9bfd3664b530ea4e645105f557769387b3da9f79bdb55ed556bdd80611d", "name_or_path": "../input/mexdpr/LaBSE-BERT/tokenizer", "tokenizer_class": "DPRContextEncoderTokenizer", "vocab_size": 501153}
passage/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
prediction_head_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:339cf0a79d42b8a0e283632633b8c7c079101ef627869b4b27f743462db4ecf9
3
+ size 495
prediction_head_0_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"training": false, "similarity_function": "dot_product", "task_name": "text_similarity", "model_type": "text_similarity", "ph_output_type": "per_sequence", "global_loss_buffer_size": 150000, "label_tensor_name": "label_ids", "label_list": ["hard_negative", "positive"], "metric": "text_similarity_metric", "name": "TextSimilarityHead"}
processor_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"baskets": [], "data_dir": "../input/mexdpr/fifths", "dev_filename": "nq-dev.json", "dev_split": 0.0, "embed_title": true, "max_samples": null, "max_seq_len": null, "max_seq_len_passage": 256, "max_seq_len_query": 64, "num_hard_negatives": 1, "num_positives": 1, "proxies": null, "shuffle_negatives": true, "shuffle_positives": false, "tasks": {"text_similarity": {"label_list": ["hard_negative", "positive"], "metric": "text_similarity_metric", "label_tensor_name": "label_ids", "label_name": "label", "label_column_name": null, "text_column_name": null, "task_type": "text_similarity"}}, "test_filename": "nq-test.json", "tokenizer": null, "train_filename": "nq-train.json", "query_tokenizer": "DPRQuestionEncoderTokenizer", "passage_tokenizer": "DPRContextEncoderTokenizer", "processor": "TextSimilarityProcessor"}
query/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
query/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": false, "do_basic_tokenize": true, "never_split": null, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": "C:\\Users\\DiarS/.cache\\huggingface\\transformers\\5fb4590a69eca214db9d31f0a4e90637a90fab773b17d382309a27f2a34da5be.dd8bd9bfd3664b530ea4e645105f557769387b3da9f79bdb55ed556bdd80611d", "name_or_path": "../input/mexdpr/LaBSE-BERT/tokenizer", "tokenizer_class": "DPRQuestionEncoderTokenizer", "vocab_size": 501153}
query/vocab.txt ADDED
The diff for this file is too large to render. See raw diff