bwang0911 commited on
Commit
0e50fd1
1 Parent(s): d42d28c

refactor: add docstring

Browse files
Files changed (1) hide show
  1. modeling_clip.py +31 -1
modeling_clip.py CHANGED
@@ -260,7 +260,37 @@ class JinaCLIPModel(JinaCLIPPreTrainedModel):
260
  normalize_embeddings: bool = False,
261
  **tokenizer_kwargs,
262
  ) -> Union[List[torch.Tensor], np.ndarray, torch.Tensor]::
263
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
  self.eval()
265
 
266
  if show_progress_bar is None:
 
260
  normalize_embeddings: bool = False,
261
  **tokenizer_kwargs,
262
  ) -> Union[List[torch.Tensor], np.ndarray, torch.Tensor]::
263
+ """
264
+ Computes sentence embeddings
265
+ Args:
266
+ sentences(`str` or `List[str]`):
267
+ Sentence or sentences to be encoded
268
+ batch_size(`int`, *optional*, defaults to 32):
269
+ Batch size for the computation
270
+ show_progress_bar(`bool`, *optional*, defaults to None):
271
+ Show a progress bar when encoding sentences.
272
+ If set to None, progress bar is only shown when `logger.level == logging.INFO` or `logger.level == logging.DEBUG`.
273
+ output_value(`str`, *optional*, defaults to 'sentence_embedding'):
274
+ Default sentence_embedding, to get sentence embeddings.
275
+ Can be set to token_embeddings to get wordpiece token embeddings.
276
+ Set to None, to get all output values
277
+ convert_to_numpy(`bool`, *optional*, defaults to True):
278
+ If true, the output is a list of numpy vectors.
279
+ Else, it is a list of pytorch tensors.
280
+ convert_to_tensor(`bool`, *optional*, defaults to False):
281
+ If true, you get one large tensor as return.
282
+ Overwrites any setting from convert_to_numpy
283
+ device(`torch.device`, *optional*, defaults to None):
284
+ Which torch.device to use for the computation
285
+ normalize_embeddings(`bool`, *optional*, defaults to False):
286
+ If set to true, returned vectors will have length 1. In that case, the faster dot-product (util.dot_score) instead of cosine similarity can be used.
287
+ tokenizer_kwargs(`Dict[str, Any]`, *optional*, defaults to {}):
288
+ Keyword arguments for the tokenizer
289
+ Returns:
290
+ By default, a list of tensors is returned.
291
+ If convert_to_tensor, a stacked tensor is returned.
292
+ If convert_to_numpy, a numpy matrix is returned.
293
+ """
294
  self.eval()
295
 
296
  if show_progress_bar is None: