michaelfeil commited on
Commit
a0c1f55
1 Parent(s): 8cd49d5

Upload 14 files

Browse files
.gitattributes CHANGED
@@ -1,35 +1,21 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
9
  *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
  *.model filter=lfs diff=lfs merge=lfs -text
13
  *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
  *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
  *.pt filter=lfs diff=lfs merge=lfs -text
23
  *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
2
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
 
 
 
 
4
  *.h5 filter=lfs diff=lfs merge=lfs -text
5
+ *.tflite filter=lfs diff=lfs merge=lfs -text
6
+ *.tar.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.ot filter=lfs diff=lfs merge=lfs -text
8
+ *.onnx filter=lfs diff=lfs merge=lfs -text
9
+ *.arrow filter=lfs diff=lfs merge=lfs -text
10
+ *.ftz filter=lfs diff=lfs merge=lfs -text
11
  *.joblib filter=lfs diff=lfs merge=lfs -text
 
 
12
  *.model filter=lfs diff=lfs merge=lfs -text
13
  *.msgpack filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
14
  *.pb filter=lfs diff=lfs merge=lfs -text
 
 
15
  *.pt filter=lfs diff=lfs merge=lfs -text
16
  *.pth filter=lfs diff=lfs merge=lfs -text
17
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
18
+ flax_model.msgpack filter=lfs diff=lfs merge=lfs -text
19
+ pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
20
+ .git/lfs/objects/29/93/29936a7b8af6bfb56f24e9a162e586c784b05997fdb6a2fdfca3d69bf0368feb filter=lfs diff=lfs merge=lfs -text
21
+ .git/lfs/objects/6e/af/6eaf3508e4f24b3942477ea5d3b65c2a5ea3e81d2932c885c8644f43cbfc8b53 filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
1_Pooling/config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false
7
+ }
README.md CHANGED
@@ -1,3 +1,107 @@
1
  ---
 
2
  license: apache-2.0
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ pipeline_tag: sentence-similarity
3
  license: apache-2.0
4
+ tags:
5
+ - sentence-transformers
6
+ - feature-extraction
7
+ - sentence-similarity
8
+ - transformers
9
  ---
10
+
11
+ # sentence-transformers/stsb-roberta-base-v2
12
+
13
+ This is a [sentence-transformers](https://www.SBERT.net) model: It maps sentences & paragraphs to a 768 dimensional dense vector space and can be used for tasks like clustering or semantic search.
14
+
15
+
16
+
17
+ ## Usage (Sentence-Transformers)
18
+
19
+ Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed:
20
+
21
+ ```
22
+ pip install -U sentence-transformers
23
+ ```
24
+
25
+ Then you can use the model like this:
26
+
27
+ ```python
28
+ from sentence_transformers import SentenceTransformer
29
+ sentences = ["This is an example sentence", "Each sentence is converted"]
30
+
31
+ model = SentenceTransformer('sentence-transformers/stsb-roberta-base-v2')
32
+ embeddings = model.encode(sentences)
33
+ print(embeddings)
34
+ ```
35
+
36
+
37
+
38
+ ## Usage (HuggingFace Transformers)
39
+ Without [sentence-transformers](https://www.SBERT.net), you can use the model like this: First, you pass your input through the transformer model, then you have to apply the right pooling-operation on-top of the contextualized word embeddings.
40
+
41
+ ```python
42
+ from transformers import AutoTokenizer, AutoModel
43
+ import torch
44
+
45
+
46
+ #Mean Pooling - Take attention mask into account for correct averaging
47
+ def mean_pooling(model_output, attention_mask):
48
+ token_embeddings = model_output[0] #First element of model_output contains all token embeddings
49
+ input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
50
+ return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
51
+
52
+
53
+ # Sentences we want sentence embeddings for
54
+ sentences = ['This is an example sentence', 'Each sentence is converted']
55
+
56
+ # Load model from HuggingFace Hub
57
+ tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/stsb-roberta-base-v2')
58
+ model = AutoModel.from_pretrained('sentence-transformers/stsb-roberta-base-v2')
59
+
60
+ # Tokenize sentences
61
+ encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
62
+
63
+ # Compute token embeddings
64
+ with torch.no_grad():
65
+ model_output = model(**encoded_input)
66
+
67
+ # Perform pooling. In this case, max pooling.
68
+ sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
69
+
70
+ print("Sentence embeddings:")
71
+ print(sentence_embeddings)
72
+ ```
73
+
74
+
75
+
76
+ ## Evaluation Results
77
+
78
+
79
+
80
+ For an automated evaluation of this model, see the *Sentence Embeddings Benchmark*: [https://seb.sbert.net](https://seb.sbert.net?model_name=sentence-transformers/stsb-roberta-base-v2)
81
+
82
+
83
+
84
+ ## Full Model Architecture
85
+ ```
86
+ SentenceTransformer(
87
+ (0): Transformer({'max_seq_length': 75, 'do_lower_case': False}) with Transformer model: RobertaModel
88
+ (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
89
+ )
90
+ ```
91
+
92
+ ## Citing & Authors
93
+
94
+ This model was trained by [sentence-transformers](https://www.sbert.net/).
95
+
96
+ If you find this model helpful, feel free to cite our publication [Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks](https://arxiv.org/abs/1908.10084):
97
+ ```bibtex
98
+ @inproceedings{reimers-2019-sentence-bert,
99
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
100
+ author = "Reimers, Nils and Gurevych, Iryna",
101
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
102
+ month = "11",
103
+ year = "2019",
104
+ publisher = "Association for Computational Linguistics",
105
+ url = "http://arxiv.org/abs/1908.10084",
106
+ }
107
+ ```
config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "old_models/stsb-roberta-base-v2/0_Transformer",
3
+ "architectures": [
4
+ "HFAdaptedRoBERTaHeadless"
5
+ ],
6
+ "auto_map": {
7
+ "AutoConfig": "modeling_roberta.HFAdaptedRoBERTaConfig",
8
+ "AutoModel": "modeling_roberta.HFAdaptedRoBERTaHeadless"
9
+ },
10
+ "attention_probs_dropout_prob": 0.1,
11
+ "bos_token_id": 0,
12
+ "eos_token_id": 2,
13
+ "gradient_checkpointing": false,
14
+ "hidden_act": "gelu",
15
+ "hidden_dropout_prob": 0.1,
16
+ "hidden_size": 768,
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 3072,
19
+ "layer_norm_eps": 1e-05,
20
+ "max_position_embeddings": 514,
21
+ "model_type": "roberta-custom",
22
+ "num_attention_heads": 12,
23
+ "num_hidden_layers": 12,
24
+ "pad_token_id": 1,
25
+ "position_embedding_type": "absolute",
26
+ "transformers_version": "4.7.0",
27
+ "type_vocab_size": 1,
28
+ "use_cache": true,
29
+ "vocab_size": 50265
30
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "2.0.0",
4
+ "transformers": "4.7.0",
5
+ "pytorch": "1.9.0+cu102"
6
+ }
7
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
modeling_roberta.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Optional, Tuple
2
+
3
+ import torch
4
+ import torch.nn as nn
5
+ from transformers import PretrainedConfig
6
+ from transformers.modeling_outputs import BaseModelOutputWithPastAndCrossAttentions
7
+
8
+ from fms.models.hf.lm_head_mixins import (
9
+ MaskedLMHeadMixin,
10
+ SequenceClassificationLMHeadMixin,
11
+ )
12
+ from fms.models.hf.modeling_hf_adapter import HFEncoder, HFEncoderModelArchitecture
13
+ from fms.models.roberta import RoBERTa, RoBERTaConfig, RoBERTaHeadless
14
+
15
+
16
+ class HFAdaptedRoBERTaConfig(PretrainedConfig):
17
+ model_type = "hf_adapted_roberta"
18
+
19
+ attribute_map = {
20
+ "vocab_size": "src_vocab_size",
21
+ "hidden_size": "emb_dim",
22
+ "num_attention_heads": "nheads",
23
+ "num_hidden_layers": "nlayers",
24
+ "tie_word_embeddings": "tie_heads",
25
+ }
26
+
27
+ def __init__(
28
+ self,
29
+ src_vocab_size=None,
30
+ emb_dim=None,
31
+ nheads=12,
32
+ nlayers=12,
33
+ max_pos=512,
34
+ pad_token_id=1,
35
+ hidden_grow_factor=4,
36
+ activation_fn="gelu",
37
+ classifier_activation_fn="tanh",
38
+ p_dropout=0.1,
39
+ classifier_dropout=0.1,
40
+ use_cache=True,
41
+ num_labels=1,
42
+ norm_eps=1e-12,
43
+ tie_heads=False,
44
+ **kwargs,
45
+ ):
46
+ self.src_vocab_size = src_vocab_size
47
+ self.emb_dim = emb_dim
48
+ self.nheads = nheads
49
+ self.nlayers = nlayers
50
+ self.max_pos = max_pos
51
+ self.hidden_grow_factor = hidden_grow_factor
52
+ if activation_fn.lower() not in ["gelu", "relu", "mish", "swish"]:
53
+ raise ValueError(
54
+ "activation function must be one of gelu, relu, mish, swish"
55
+ )
56
+ self.activation_fn = activation_fn
57
+ self.p_dropout = p_dropout
58
+ self.classifier_dropout = classifier_dropout
59
+ self.use_cache = use_cache
60
+ self.norm_eps = norm_eps
61
+ self.classifier_activation_fn = classifier_activation_fn
62
+ self.tie_heads = tie_heads
63
+ super().__init__(
64
+ pad_token_id=pad_token_id,
65
+ num_labels=num_labels,
66
+ tie_word_embeddings=kwargs.pop("tie_word_embeddings", tie_heads),
67
+ **kwargs,
68
+ )
69
+
70
+ @classmethod
71
+ def from_pretrained(
72
+ cls, pretrained_model_name_or_path, **kwargs
73
+ ) -> "PretrainedConfig":
74
+ config_dict, kwargs = cls.get_config_dict(
75
+ pretrained_model_name_or_path, **kwargs
76
+ )
77
+
78
+ return cls.from_dict(config_dict, **kwargs)
79
+
80
+ @classmethod
81
+ def from_fms_config(cls, config: RoBERTaConfig, **hf_kwargs):
82
+ config_dict = config.as_dict()
83
+ config_dict["pad_token_id"] = config_dict.pop("pad_id")
84
+ return cls.from_dict(config_dict, **hf_kwargs)
85
+
86
+
87
+ class HFAdaptedRoBERTaEncoder(HFEncoder):
88
+ """Adapter for the Roberta Encoder"""
89
+
90
+ def __init__(self, model: RoBERTaHeadless, config: PretrainedConfig):
91
+ super().__init__(model, config, attention_mask_dim=3)
92
+
93
+ def _adapt(
94
+ self,
95
+ input_ids: Optional[torch.LongTensor] = None,
96
+ attention_mask: Optional[torch.FloatTensor] = None,
97
+ head_mask: Optional[torch.FloatTensor] = None,
98
+ inputs_embeds: Optional[torch.FloatTensor] = None,
99
+ output_attentions: Optional[bool] = None,
100
+ output_hidden_states: Optional[bool] = None,
101
+ position_ids: Optional[torch.LongTensor] = None,
102
+ *args,
103
+ **kwargs,
104
+ ) -> BaseModelOutputWithPastAndCrossAttentions:
105
+ return BaseModelOutputWithPastAndCrossAttentions(
106
+ last_hidden_state=self.model(
107
+ x=input_ids, mask=attention_mask, position_ids=position_ids
108
+ )
109
+ )
110
+
111
+
112
+ class HFAdaptedRoBERTaHeadless(HFEncoderModelArchitecture):
113
+ # attributes required by HF
114
+ config_class = HFAdaptedRoBERTaConfig
115
+ base_model_prefix = "hf_adapted_roberta"
116
+
117
+ def __init__(
118
+ self,
119
+ config: PretrainedConfig,
120
+ encoder: Optional[RoBERTaHeadless] = None,
121
+ embedding: Optional[nn.Module] = None,
122
+ *args,
123
+ **kwargs,
124
+ ):
125
+ # in the case we have not yet received the encoder/decoder/embedding, initialize it here
126
+ if encoder is None or embedding is None:
127
+ params = config.to_dict()
128
+ model = RoBERTa(pad_id=params.pop("pad_token_id"), **params)
129
+ encoder = model.base_model if encoder is None else encoder
130
+ embedding = model.base_model.embedding if embedding is None else embedding
131
+
132
+ # these are now huggingface compatible
133
+ encoder = HFAdaptedRoBERTaEncoder(encoder, config)
134
+ super().__init__(encoder, embedding, config, *args, **kwargs)
135
+
136
+
137
+ class HFAdaptedRoBERTaForMaskedLM(MaskedLMHeadMixin, HFAdaptedRoBERTaHeadless):
138
+ def __init__(self, config: HFAdaptedRoBERTaConfig, *args, **kwargs):
139
+ super().__init__(
140
+ config=config,
141
+ activation_fn=config.activation_fn,
142
+ norm_eps=config.norm_eps,
143
+ *args,
144
+ **kwargs,
145
+ )
146
+
147
+ @classmethod
148
+ def _hf_model_from_fms(
149
+ cls, model: RoBERTa, config: HFAdaptedRoBERTaConfig
150
+ ) -> "HFAdaptedRoBERTaForMaskedLM":
151
+ return cls(
152
+ config=config,
153
+ encoder=model.base_model,
154
+ embedding=model.base_model.embedding,
155
+ lm_head=model.classification_head,
156
+ )
157
+
158
+
159
+ class HFAdaptedRoBERTaForSequenceClassification(
160
+ SequenceClassificationLMHeadMixin, HFAdaptedRoBERTaHeadless
161
+ ):
162
+ def __init__(
163
+ self,
164
+ config: HFAdaptedRoBERTaConfig,
165
+ encoder: Optional[nn.Module] = None,
166
+ embedding: Optional[nn.Module] = None,
167
+ *args,
168
+ **kwargs,
169
+ ):
170
+ super().__init__(
171
+ config=config,
172
+ classifier_activation_fn=config.classifier_activation_fn,
173
+ classifier_dropout=config.classifier_dropout,
174
+ encoder=encoder,
175
+ embedding=embedding,
176
+ *args,
177
+ **kwargs,
178
+ )
179
+
180
+ @classmethod
181
+ def _hf_model_from_fms(
182
+ cls, model: RoBERTa, config: HFAdaptedRoBERTaConfig
183
+ ) -> "HFAdaptedRoBERTaForSequenceClassification":
184
+ return cls(
185
+ config=config,
186
+ encoder=model.base_model,
187
+ embedding=model.base_model.embedding,
188
+ )
modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:417c0e9ea35a21ead76cb2fe422b51ff7fbd2a206654754753ddc6b27a17ba7c
3
+ size 498661169
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 75,
3
+ "do_lower_case": false
4
+ }
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "errors": "replace", "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "old_models/stsb-roberta-base-v2/0_Transformer"}
vocab.json ADDED
The diff for this file is too large to render. See raw diff