UncleFish commited on
Commit
b604fc1
·
1 Parent(s): 60533ce

fix import errors

Browse files
Files changed (3) hide show
  1. README.md +4 -3
  2. modeling_xgenmm.py +2 -2
  3. vlm.py +1 -1
README.md CHANGED
@@ -60,9 +60,10 @@ import json
60
  import PIL
61
  import IPython.display as display
62
  import torch
63
- model = AutoModelForVision2Seq.from_pretrained("./", trust_remote_code=True)
64
- tokenizer = AutoTokenizer.from_pretrained("./", trust_remote_code=True, use_fast=True, legacy=False)
65
- image_processor = AutoImageProcessor.from_pretrained("./", trust_remote_code=True)
 
66
  tokenizer = model.update_special_tokens(tokenizer)
67
 
68
  model = model.to('cuda')
 
60
  import PIL
61
  import IPython.display as display
62
  import torch
63
+ model_name_or_path = "Salesforce/xgen-mm-phi3-mini-base-r-v1"
64
+ model = AutoModelForVision2Seq.from_pretrained(model_name_or_path, trust_remote_code=True)
65
+ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True, use_fast=True, legacy=False)
66
+ image_processor = AutoImageProcessor.from_pretrained(model_name_or_path, trust_remote_code=True)
67
  tokenizer = model.update_special_tokens(tokenizer)
68
 
69
  model = model.to('cuda')
modeling_xgenmm.py CHANGED
@@ -3,8 +3,8 @@ import torch
3
  import open_clip
4
  from typing import List, Optional, Tuple, Union
5
  from utils import check_embedding_fns
6
- from vlm import PerceiverResampler, Kosmos
7
- from configuration_xgenmm import XGenMMVisionEncoderConfig, XGenMMVisionTokenizerConfig, XGenMMConfig
8
 
9
  class XGenMMVisionEncoder(PreTrainedModel):
10
  main_input_name = "pixel_values"
 
3
  import open_clip
4
  from typing import List, Optional, Tuple, Union
5
  from utils import check_embedding_fns
6
+ from .vlm import PerceiverResampler, Kosmos
7
+ from .configuration_xgenmm import XGenMMVisionEncoderConfig, XGenMMVisionTokenizerConfig, XGenMMConfig
8
 
9
  class XGenMMVisionEncoder(PreTrainedModel):
10
  main_input_name = "pixel_values"
vlm.py CHANGED
@@ -11,7 +11,7 @@ from dataclasses import dataclass
11
  from transformers import CLIPVisionModel
12
  import transformers
13
 
14
- from utils import num_params, getattr_recursive, stack_with_padding, get_anyres_image_grid_shape, unpad_image
15
 
16
 
17
  class VisionTokenizer(nn.Module):
 
11
  from transformers import CLIPVisionModel
12
  import transformers
13
 
14
+ from .utils import num_params, getattr_recursive, stack_with_padding, get_anyres_image_grid_shape, unpad_image
15
 
16
 
17
  class VisionTokenizer(nn.Module):