michael-guenther Jackmin108 commited on
Commit
a709b51
1 Parent(s): 57dbe22

lora-instructions (#36)

Browse files

- feat: add lora instructions for retrieval (7af97e787979d0496a49416b8972adb1817d13fa)
- fix: when sentences is one (c35a42b4b8cf2e60a7b34f7cb8da522d0d6f0f52)
- fix: sentences as a str (5f8e4b6771a158cdfb84b3c381051ad555655426)
- merge recent changes (8f83a352fa175fb5547dae6330c05e1c3a56395e)
- refactor: prompts (aeb99cb139baff8daa0845e9e9be0d9c39f21271)


Co-authored-by: Jack Min Ong <Jackmin108@users.noreply.huggingface.co>

Files changed (2) hide show
  1. configuration_xlm_roberta.py +2 -2
  2. modeling_lora.py +15 -10
configuration_xlm_roberta.py CHANGED
@@ -31,7 +31,7 @@ class XLMRobertaFlashConfig(PretrainedConfig):
31
  use_reentrant: bool = False,
32
  classifier_dropout: Optional[float] = None,
33
  lora_adaptations: Optional[List[str]] = None,
34
- lora_prompts: Optional[Dict[str, str]] = None,
35
  lora_rank: int = 4,
36
  lora_dropout_p: float = 0.0,
37
  lora_alpha: int = 1,
@@ -109,7 +109,7 @@ class XLMRobertaFlashConfig(PretrainedConfig):
109
  self.classifier_dropout = classifier_dropout
110
  self.load_trained_adapters = load_trained_adapters
111
  self.lora_adaptations = lora_adaptations
112
- self.lora_prompts = lora_prompts
113
  self.lora_rank = lora_rank
114
  self.lora_dropout_p = lora_dropout_p
115
  self.lora_alpha = lora_alpha
 
31
  use_reentrant: bool = False,
32
  classifier_dropout: Optional[float] = None,
33
  lora_adaptations: Optional[List[str]] = None,
34
+ task_instructions: Optional[Dict[str, str]] = None,
35
  lora_rank: int = 4,
36
  lora_dropout_p: float = 0.0,
37
  lora_alpha: int = 1,
 
109
  self.classifier_dropout = classifier_dropout
110
  self.load_trained_adapters = load_trained_adapters
111
  self.lora_adaptations = lora_adaptations
112
+ self.task_instructions = task_instructions
113
  self.lora_rank = lora_rank
114
  self.lora_dropout_p = lora_dropout_p
115
  self.lora_alpha = lora_alpha
modeling_lora.py CHANGED
@@ -165,7 +165,6 @@ class LoRAParametrization(nn.Module):
165
  ):
166
  """
167
  Registering LoRA adapters to all embedding and linear layers.
168
-
169
  Additionally, we implement a custom forward function for LoRA parametrization.
170
  This function modifies the layer's forward pass to optionally use task-specific
171
  parameters. When a `task_id` is provided, it employs a LoRA parametrization
@@ -242,6 +241,7 @@ class XLMRobertaLoRA(XLMRobertaPreTrainedModel):
242
  """
243
  A wrapper class around the Jina XLM-RoBERTa model that integrates LoRA (Low-Rank Adaptation) adapters.
244
  """
 
245
  def __init__(
246
  self, config: XLMRobertaFlashConfig, roberta: Optional[XLMRobertaModel] = None
247
  ):
@@ -259,15 +259,17 @@ class XLMRobertaLoRA(XLMRobertaPreTrainedModel):
259
  raise ValueError(
260
  f"`lora_adaptations` must be a list and contain at least one element"
261
  )
262
- self._lora_prompts = config.lora_prompts
263
  if (
264
- not isinstance(self._lora_prompts, dict)
265
- or len(self._lora_prompts) != len(self._lora_adaptations)
266
- or not all([v in self._lora_adaptations for v in self._lora_prompts.keys()])
 
 
267
  ):
268
  raise ValueError(
269
- f"`lora_prompts` must be a dict and contain the same number of elements "
270
- f"as `lora_adaptations` with all keys in `lora_prompts` present in `lora_adaptations`."
271
  )
272
  self._adaptation_map = {
273
  name: idx for idx, name in enumerate(self._lora_adaptations)
@@ -323,11 +325,11 @@ class XLMRobertaLoRA(XLMRobertaPreTrainedModel):
323
  use_safetensors: bool = None,
324
  **kwargs,
325
  ):
326
- if config.load_trained_adapters: # checkpoint already contains LoRA adapters
327
  return super().from_pretrained(
328
  pretrained_model_name_or_path, *model_args, use_flash_attn=config.use_flash_attn, **kwargs
329
  )
330
- else: # initializing new adapters
331
  roberta = XLMRobertaModel.from_pretrained(
332
  pretrained_model_name_or_path, *model_args, use_flash_attn=config.use_flash_attn, **kwargs
333
  )
@@ -370,7 +372,6 @@ class XLMRobertaLoRA(XLMRobertaPreTrainedModel):
370
  ) -> Union[List[torch.Tensor], np.ndarray, torch.Tensor]:
371
  """
372
  Computes sentence embeddings.
373
-
374
  sentences(`str` or `List[str]`):
375
  Sentence or sentences to be encoded
376
  task_type(`str`, *optional*, defaults to `None`):
@@ -391,6 +392,10 @@ class XLMRobertaLoRA(XLMRobertaPreTrainedModel):
391
  adapter_mask = torch.full(
392
  (num_examples,), task_id, dtype=torch.int32, device=self.device
393
  )
 
 
 
 
394
  return self.roberta.encode(
395
  sentences, *args, adapter_mask=adapter_mask, **kwargs
396
  )
 
165
  ):
166
  """
167
  Registering LoRA adapters to all embedding and linear layers.
 
168
  Additionally, we implement a custom forward function for LoRA parametrization.
169
  This function modifies the layer's forward pass to optionally use task-specific
170
  parameters. When a `task_id` is provided, it employs a LoRA parametrization
 
241
  """
242
  A wrapper class around the Jina XLM-RoBERTa model that integrates LoRA (Low-Rank Adaptation) adapters.
243
  """
244
+
245
  def __init__(
246
  self, config: XLMRobertaFlashConfig, roberta: Optional[XLMRobertaModel] = None
247
  ):
 
259
  raise ValueError(
260
  f"`lora_adaptations` must be a list and contain at least one element"
261
  )
262
+ self._task_instructions = config.task_instructions
263
  if (
264
+ not isinstance(self._task_instructions, dict)
265
+ or len(self._task_instructions) != len(self._lora_adaptations)
266
+ or not all(
267
+ [v in self._lora_adaptations for v in self._task_instructions.keys()]
268
+ )
269
  ):
270
  raise ValueError(
271
+ f"`task_instructions` must be a dict and contain the same number of elements "
272
+ f"as `lora_adaptations` with all keys in `task_instructions` present in `lora_adaptations`."
273
  )
274
  self._adaptation_map = {
275
  name: idx for idx, name in enumerate(self._lora_adaptations)
 
325
  use_safetensors: bool = None,
326
  **kwargs,
327
  ):
328
+ if config.load_trained_adapters: # checkpoint already contains LoRA adapters
329
  return super().from_pretrained(
330
  pretrained_model_name_or_path, *model_args, use_flash_attn=config.use_flash_attn, **kwargs
331
  )
332
+ else: # initializing new adapters
333
  roberta = XLMRobertaModel.from_pretrained(
334
  pretrained_model_name_or_path, *model_args, use_flash_attn=config.use_flash_attn, **kwargs
335
  )
 
372
  ) -> Union[List[torch.Tensor], np.ndarray, torch.Tensor]:
373
  """
374
  Computes sentence embeddings.
 
375
  sentences(`str` or `List[str]`):
376
  Sentence or sentences to be encoded
377
  task_type(`str`, *optional*, defaults to `None`):
 
392
  adapter_mask = torch.full(
393
  (num_examples,), task_id, dtype=torch.int32, device=self.device
394
  )
395
+ if isinstance(sentences, str):
396
+ sentences = self._task_instructions[task_type] + sentences
397
+ else:
398
+ sentences = [self._task_instructions[task_type] + sentence for sentence in sentences]
399
  return self.roberta.encode(
400
  sentences, *args, adapter_mask=adapter_mask, **kwargs
401
  )