unifying the input shape of the text-only branch and the text-image branch

#19
by cardcounter - opened
Files changed (1) hide show
  1. modeling_internlm_xcomposer2.py +2 -1
modeling_internlm_xcomposer2.py CHANGED
@@ -423,6 +423,7 @@ class InternLMXComposer2ForCausalLM(InternLM2PreTrainedModel):
423
  Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
424
  config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
425
  (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
 
426
  Returns:
427
  """
428
 
@@ -458,7 +459,7 @@ class InternLMXComposer2ForCausalLM(InternLM2PreTrainedModel):
458
  image, text, image_nums)
459
  else:
460
  to_regress_tokens, targets = self.text2emb(
461
- text, add_special_tokens=True)
462
  to_regress_embeds = self.model.tok_embeddings(
463
  to_regress_tokens.input_ids)
464
  attention_mask = to_regress_tokens.attention_mask
 
423
  Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
424
  config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
425
  (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
426
+ kwargs['samples']['text_input] should have dimension 1 x bs
427
  Returns:
428
  """
429
 
 
459
  image, text, image_nums)
460
  else:
461
  to_regress_tokens, targets = self.text2emb(
462
+ text[0], add_special_tokens=True)
463
  to_regress_embeds = self.model.tok_embeddings(
464
  to_regress_tokens.input_ids)
465
  attention_mask = to_regress_tokens.attention_mask