Diar commited on
Commit
c7aa77f
β€’
1 Parent(s): f9ca82a
lm1/language_model_config.json β†’ language_model_config.json RENAMED
@@ -1,5 +1,4 @@
1
  {
2
- "_name_or_path": "sentence-transformers/LaBSE",
3
  "architectures": [
4
  "BertModel"
5
  ],
 
1
  {
 
2
  "architectures": [
3
  "BertModel"
4
  ],
lm2/language_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d293ec78f56a8c0f471480fe13916b7055836e89f5fa1774b48cf1bc35228e7b
3
- size 1883803575
 
 
 
 
lm2/language_model_config.json DELETED
@@ -1,34 +0,0 @@
1
- {
2
- "_name_or_path": "sentence-transformers/LaBSE",
3
- "architectures": [
4
- "BertModel"
5
- ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "directionality": "bidi",
8
- "gradient_checkpointing": false,
9
- "hidden_act": "gelu",
10
- "hidden_dropout_prob": 0.1,
11
- "hidden_size": 768,
12
- "initializer_range": 0.02,
13
- "intermediate_size": 3072,
14
- "language": "english",
15
- "layer_norm_eps": 1e-12,
16
- "max_position_embeddings": 512,
17
- "model_type": "dpr",
18
- "name": "DPRContextEncoder",
19
- "num_attention_heads": 12,
20
- "num_hidden_layers": 12,
21
- "pad_token_id": 0,
22
- "pooler_fc_size": 768,
23
- "pooler_num_attention_heads": 12,
24
- "pooler_num_fc_layers": 3,
25
- "pooler_size_per_head": 128,
26
- "pooler_type": "first_token_transform",
27
- "position_embedding_type": "absolute",
28
- "projection_dim": 0,
29
- "revision": null,
30
- "transformers_version": "4.6.0.dev0",
31
- "type_vocab_size": 2,
32
- "use_cache": true,
33
- "vocab_size": 501153
34
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
passage/tokenizer_config.json DELETED
@@ -1 +0,0 @@
1
- {"do_lower_case": false, "do_basic_tokenize": true, "never_split": null, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": "C:\\Users\\DiarS/.cache\\huggingface\\transformers\\5fb4590a69eca214db9d31f0a4e90637a90fab773b17d382309a27f2a34da5be.dd8bd9bfd3664b530ea4e645105f557769387b3da9f79bdb55ed556bdd80611d", "name_or_path": "../input/mexdpr/LaBSE-BERT/tokenizer", "tokenizer_class": "DPRContextEncoderTokenizer", "vocab_size": 501153}
 
 
prediction_head_0.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:339cf0a79d42b8a0e283632633b8c7c079101ef627869b4b27f743462db4ecf9
3
- size 495
 
 
 
 
prediction_head_0_config.json DELETED
@@ -1 +0,0 @@
1
- {"training": false, "similarity_function": "dot_product", "task_name": "text_similarity", "model_type": "text_similarity", "ph_output_type": "per_sequence", "global_loss_buffer_size": 150000, "label_tensor_name": "label_ids", "label_list": ["hard_negative", "positive"], "metric": "text_similarity_metric", "name": "TextSimilarityHead"}
 
 
processor_config.json DELETED
@@ -1 +0,0 @@
1
- {"baskets": [], "data_dir": "../input/mexdpr/fifths", "dev_filename": "nq-dev.json", "dev_split": 0.0, "embed_title": true, "max_samples": null, "max_seq_len": null, "max_seq_len_passage": 256, "max_seq_len_query": 64, "num_hard_negatives": 1, "num_positives": 1, "proxies": null, "shuffle_negatives": true, "shuffle_positives": false, "tasks": {"text_similarity": {"label_list": ["hard_negative", "positive"], "metric": "text_similarity_metric", "label_tensor_name": "label_ids", "label_name": "label", "label_column_name": null, "text_column_name": null, "task_type": "text_similarity"}}, "test_filename": "nq-test.json", "tokenizer": null, "train_filename": "nq-train.json", "query_tokenizer": "DPRQuestionEncoderTokenizer", "passage_tokenizer": "DPRContextEncoderTokenizer", "processor": "TextSimilarityProcessor"}
 
 
lm1/language_model.bin β†’ pytorch_model.bin RENAMED
File without changes
query/special_tokens_map.json DELETED
@@ -1 +0,0 @@
1
- {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
 
 
query/vocab.txt DELETED
The diff for this file is too large to render. See raw diff
 
passage/special_tokens_map.json β†’ special_tokens_map.json RENAMED
File without changes
query/tokenizer_config.json β†’ tokenizer_config.json RENAMED
File without changes
passage/vocab.txt β†’ vocab.txt RENAMED
File without changes