refactor: add docstring
Browse files- modeling_clip.py +31 -1
modeling_clip.py
CHANGED
@@ -260,7 +260,37 @@ class JinaCLIPModel(JinaCLIPPreTrainedModel):
|
|
260 |
normalize_embeddings: bool = False,
|
261 |
**tokenizer_kwargs,
|
262 |
) -> Union[List[torch.Tensor], np.ndarray, torch.Tensor]::
|
263 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
264 |
self.eval()
|
265 |
|
266 |
if show_progress_bar is None:
|
|
|
260 |
normalize_embeddings: bool = False,
|
261 |
**tokenizer_kwargs,
|
262 |
) -> Union[List[torch.Tensor], np.ndarray, torch.Tensor]::
|
263 |
+
"""
|
264 |
+
Computes sentence embeddings
|
265 |
+
Args:
|
266 |
+
sentences(`str` or `List[str]`):
|
267 |
+
Sentence or sentences to be encoded
|
268 |
+
batch_size(`int`, *optional*, defaults to 32):
|
269 |
+
Batch size for the computation
|
270 |
+
show_progress_bar(`bool`, *optional*, defaults to None):
|
271 |
+
Show a progress bar when encoding sentences.
|
272 |
+
If set to None, progress bar is only shown when `logger.level == logging.INFO` or `logger.level == logging.DEBUG`.
|
273 |
+
output_value(`str`, *optional*, defaults to 'sentence_embedding'):
|
274 |
+
Default sentence_embedding, to get sentence embeddings.
|
275 |
+
Can be set to token_embeddings to get wordpiece token embeddings.
|
276 |
+
Set to None, to get all output values
|
277 |
+
convert_to_numpy(`bool`, *optional*, defaults to True):
|
278 |
+
If true, the output is a list of numpy vectors.
|
279 |
+
Else, it is a list of pytorch tensors.
|
280 |
+
convert_to_tensor(`bool`, *optional*, defaults to False):
|
281 |
+
If true, you get one large tensor as return.
|
282 |
+
Overwrites any setting from convert_to_numpy
|
283 |
+
device(`torch.device`, *optional*, defaults to None):
|
284 |
+
Which torch.device to use for the computation
|
285 |
+
normalize_embeddings(`bool`, *optional*, defaults to False):
|
286 |
+
If set to true, returned vectors will have length 1. In that case, the faster dot-product (util.dot_score) instead of cosine similarity can be used.
|
287 |
+
tokenizer_kwargs(`Dict[str, Any]`, *optional*, defaults to {}):
|
288 |
+
Keyword arguments for the tokenizer
|
289 |
+
Returns:
|
290 |
+
By default, a list of tensors is returned.
|
291 |
+
If convert_to_tensor, a stacked tensor is returned.
|
292 |
+
If convert_to_numpy, a numpy matrix is returned.
|
293 |
+
"""
|
294 |
self.eval()
|
295 |
|
296 |
if show_progress_bar is None:
|