jinaai
/

xlm-roberta-flash-implementation

@@ -441,6 +441,23 @@ class XLMRobertaModel(XLMRobertaPreTrainedModel):
         self.apply(partial(_init_weights, initializer_range=config.initializer_range))
     @torch.inference_mode()
     def encode(
@@ -454,6 +471,7 @@ class XLMRobertaModel(XLMRobertaPreTrainedModel):
         device: Optional[torch.device] = None,
         normalize_embeddings: bool = False,
         truncate_dim: Optional[int] = None,
         **tokenizer_kwargs,
     ) -> Union[List[torch.Tensor], np.ndarray, torch.Tensor]:
         """
@@ -485,6 +503,10 @@ class XLMRobertaModel(XLMRobertaPreTrainedModel):
                 be used.
             truncate_dim(`int`, *optional*, defaults to None):
                 The dimension to truncate sentence embeddings to. `None` does no truncation.
             tokenizer_kwargs(`Dict[str, Any]`, *optional*, defaults to {}):
                 Keyword arguments for the tokenizer
         Returns:
@@ -561,7 +583,12 @@ class XLMRobertaModel(XLMRobertaPreTrainedModel):
             elif output_value is None:
                 raise NotImplementedError
             else:
-                if self.config.emb_pooler == 'cls':
                     embeddings = self.cls_pooling(
                         token_embs, encoded_input['attention_mask']
                     )
@@ -579,14 +606,28 @@ class XLMRobertaModel(XLMRobertaPreTrainedModel):
         all_embeddings = [all_embeddings[idx] for idx in inverse_permutation]
-        truncate_dim = truncate_dim or self.config.truncate_dim
         if truncate_dim:
-            all_embeddings = self.truncate_embeddings(all_embeddings, truncate_dim)
         if convert_to_tensor:
-            all_embeddings = torch.stack(all_embeddings)
         elif convert_to_numpy:
-            all_embeddings = np.asarray([emb.numpy() for emb in all_embeddings])
         if input_was_string:
             all_embeddings = all_embeddings[0]

         self.apply(partial(_init_weights, initializer_range=config.initializer_range))
+    def chunking_pooling_inference(model_output, span_annotation):
+        token_embeddings = model_output[0]
+        outputs = []
+        for embeddings, annotations in zip(token_embeddings, span_annotation):
+            clamped_embeddings = torch.clamp(embeddings, min=-10, max=10)
+            pooled_embeddings = [
+                clamped_embeddings[start:end].sum(dim=0)
+                / (end - start if end - start > 0 else 1)
+                for start, end in annotations
+            ]
+            pooled_embeddings = [
+                embedding.detach().cpu().numpy() for embedding in pooled_embeddings
+            ]
+            outputs.append(pooled_embeddings)
+        return outputs
     @torch.inference_mode()
     def encode(
         device: Optional[torch.device] = None,
         normalize_embeddings: bool = False,
         truncate_dim: Optional[int] = None,
+        span_annotations: Optional[List[List[Tuple[int]]]] = None,
         **tokenizer_kwargs,
     ) -> Union[List[torch.Tensor], np.ndarray, torch.Tensor]:
         """
                 be used.
             truncate_dim(`int`, *optional*, defaults to None):
                 The dimension to truncate sentence embeddings to. `None` does no truncation.
+            span_annotations(`List[List[Tuple[int]]]`, *optional*, defaults to None):
+                List of list of tuples. Each tuple represents the start and end index of a chunk.
+                If provided, the embeddings are pooled for each span, and an embedding for each
+                span is returned.
             tokenizer_kwargs(`Dict[str, Any]`, *optional*, defaults to {}):
                 Keyword arguments for the tokenizer
         Returns:
             elif output_value is None:
                 raise NotImplementedError
             else:
+                if span_annotations:
+                    embeddings = self.chunking_pooling_inference(
+                        token_embs,
+                        span_annotations[i : i + batch_size],
+                    )
+                elif self.config.emb_pooler == 'cls':
                     embeddings = self.cls_pooling(
                         token_embs, encoded_input['attention_mask']
                     )
         all_embeddings = [all_embeddings[idx] for idx in inverse_permutation]
         if truncate_dim:
+            if isinstance(all_embeddings[0], list):
+                all_embeddings = [
+                    [self.truncate_embeddings(chunk, truncate_dim) for chunk in emb_batch]
+                    for emb_batch in all_embeddings
+                ]
+            else:
+                all_embeddings = self.truncate_embeddings(all_embeddings, truncate_dim)
         if convert_to_tensor:
+            if isinstance(all_embeddings[0], list):
+                all_embeddings = [torch.stack(emb_batch) for emb_batch in all_embeddings]
+            else:
+                all_embeddings = torch.stack(all_embeddings)
         elif convert_to_numpy:
+            if isinstance(all_embeddings[0], list):
+                all_embeddings = [
+                    np.asarray([chunk.numpy() for chunk in emb_batch])
+                    for emb_batch in all_embeddings
+                ]
+            else:
+                all_embeddings = np.asarray([emb.numpy() for emb in all_embeddings])
         if input_was_string:
             all_embeddings = all_embeddings[0]