czczup commited on
Commit
a84c71e
1 Parent(s): b631bf7

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. modeling_internvl_chat.py +6 -5
modeling_internvl_chat.py CHANGED
@@ -34,6 +34,7 @@ def version_cmp(v1, v2, op='eq'):
34
  class InternVLChatModel(PreTrainedModel):
35
  config_class = InternVLChatConfig
36
  main_input_name = 'pixel_values'
 
37
  _supports_flash_attn_2 = True
38
  _no_split_modules = ['InternVisionModel', 'LlamaDecoderLayer', 'Qwen2DecoderLayer']
39
 
@@ -100,7 +101,7 @@ class InternVLChatModel(PreTrainedModel):
100
  return_dict = return_dict if return_dict is not None else self.config.use_return_dict
101
 
102
  image_flags = image_flags.squeeze(-1)
103
- input_embeds = self.language_model.get_input_embeddings()(input_ids)
104
 
105
  vit_embeds = self.extract_feature(pixel_values)
106
  vit_embeds = vit_embeds[image_flags == 1]
@@ -233,8 +234,8 @@ class InternVLChatModel(PreTrainedModel):
233
 
234
  tokenizer.padding_side = 'left'
235
  model_inputs = tokenizer(queries, return_tensors='pt', padding=True)
236
- input_ids = model_inputs['input_ids'].cuda()
237
- attention_mask = model_inputs['attention_mask'].cuda()
238
  eos_token_id = tokenizer.convert_tokens_to_ids(template.sep)
239
  generation_config['eos_token_id'] = eos_token_id
240
  generation_output = self.generate(
@@ -282,8 +283,8 @@ class InternVLChatModel(PreTrainedModel):
282
  query = query.replace('<image>', image_tokens, 1)
283
 
284
  model_inputs = tokenizer(query, return_tensors='pt')
285
- input_ids = model_inputs['input_ids'].cuda()
286
- attention_mask = model_inputs['attention_mask'].cuda()
287
  generation_config['eos_token_id'] = eos_token_id
288
  generation_output = self.generate(
289
  pixel_values=pixel_values,
 
34
  class InternVLChatModel(PreTrainedModel):
35
  config_class = InternVLChatConfig
36
  main_input_name = 'pixel_values'
37
+ base_model_prefix = 'language_model'
38
  _supports_flash_attn_2 = True
39
  _no_split_modules = ['InternVisionModel', 'LlamaDecoderLayer', 'Qwen2DecoderLayer']
40
 
 
101
  return_dict = return_dict if return_dict is not None else self.config.use_return_dict
102
 
103
  image_flags = image_flags.squeeze(-1)
104
+ input_embeds = self.language_model.get_input_embeddings()(input_ids).clone()
105
 
106
  vit_embeds = self.extract_feature(pixel_values)
107
  vit_embeds = vit_embeds[image_flags == 1]
 
234
 
235
  tokenizer.padding_side = 'left'
236
  model_inputs = tokenizer(queries, return_tensors='pt', padding=True)
237
+ input_ids = model_inputs['input_ids'].to(self.device)
238
+ attention_mask = model_inputs['attention_mask'].to(self.device)
239
  eos_token_id = tokenizer.convert_tokens_to_ids(template.sep)
240
  generation_config['eos_token_id'] = eos_token_id
241
  generation_output = self.generate(
 
283
  query = query.replace('<image>', image_tokens, 1)
284
 
285
  model_inputs = tokenizer(query, return_tensors='pt')
286
+ input_ids = model_inputs['input_ids'].to(self.device)
287
+ attention_mask = model_inputs['attention_mask'].to(self.device)
288
  generation_config['eos_token_id'] = eos_token_id
289
  generation_output = self.generate(
290
  pixel_values=pixel_values,