ziyjiang commited on
Commit
0f07845
1 Parent(s): 1cca7e8

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +12 -12
README.md CHANGED
@@ -48,6 +48,7 @@ Then you can enter the directory to run the following command.
48
  ```python
49
  from src.model import MMEBModel
50
  from src.arguments import ModelArguments
 
51
  import torch
52
  from transformers import HfArgumentParser, AutoProcessor
53
  from PIL import Image
@@ -56,21 +57,20 @@ import numpy as np
56
  model_args = ModelArguments(
57
  model_name='TIGER-Lab/VLM2Vec-Full',
58
  pooling='last',
59
- normalize=True)
 
 
 
 
60
 
61
  model = MMEBModel.load(model_args)
62
  model.eval()
63
  model = model.to('cuda', dtype=torch.bfloat16)
64
 
65
- processor = AutoProcessor.from_pretrained(
66
- model_args.model_name,
67
- trust_remote_code=True,
68
- model_backbone='phi3_v',
69
- num_crops=4,
70
- )
71
 
72
  # Image + Text -> Text
73
- inputs = processor('<|image_1|> Represent the given image with the following question: What is in the image', [Image.open('figures/example.jpg')])
 
74
  inputs = {key: value.to('cuda') for key, value in inputs.items()}
75
  qry_output = model(qry=inputs)["qry_reps"]
76
 
@@ -79,14 +79,14 @@ inputs = processor(string)
79
  inputs = {key: value.to('cuda') for key, value in inputs.items()}
80
  tgt_output = model(tgt=inputs)["tgt_reps"]
81
  print(string, '=', model.compute_similarity(qry_output, tgt_output))
82
- ## A cat and a dog = tensor([[0.2969]], device='cuda:0', dtype=torch.bfloat16)
83
 
84
  string = 'A cat and a tiger'
85
  inputs = processor(string)
86
  inputs = {key: value.to('cuda') for key, value in inputs.items()}
87
  tgt_output = model(tgt=inputs)["tgt_reps"]
88
  print(string, '=', model.compute_similarity(qry_output, tgt_output))
89
- ## A cat and a tiger = tensor([[0.2080]], device='cuda:0', dtype=torch.bfloat16)
90
 
91
  # Text -> Image
92
  inputs = processor('Find me an everyday image that matches the given caption: A cat and a dog.',)
@@ -98,7 +98,7 @@ inputs = processor(string, [Image.open('figures/example.jpg')])
98
  inputs = {key: value.to('cuda') for key, value in inputs.items()}
99
  tgt_output = model(tgt=inputs)["tgt_reps"]
100
  print(string, '=', model.compute_similarity(qry_output, tgt_output))
101
- ## <|image_1|> Represent the given image. = tensor([[0.3105]], device='cuda:0', dtype=torch.bfloat16)
102
 
103
  inputs = processor('Find me an everyday image that matches the given caption: A cat and a tiger.',)
104
  inputs = {key: value.to('cuda') for key, value in inputs.items()}
@@ -109,7 +109,7 @@ inputs = processor(string, [Image.open('figures/example.jpg')])
109
  inputs = {key: value.to('cuda') for key, value in inputs.items()}
110
  tgt_output = model(tgt=inputs)["tgt_reps"]
111
  print(string, '=', model.compute_similarity(qry_output, tgt_output))
112
- ## <|image_1|> Represent the given image. = tensor([[0.2158]], device='cuda:0', dtype=torch.bfloat16)
113
  ```
114
 
115
  ## Citation
 
48
  ```python
49
  from src.model import MMEBModel
50
  from src.arguments import ModelArguments
51
+ from src.utils import load_processor
52
  import torch
53
  from transformers import HfArgumentParser, AutoProcessor
54
  from PIL import Image
 
57
  model_args = ModelArguments(
58
  model_name='TIGER-Lab/VLM2Vec-Full',
59
  pooling='last',
60
+ normalize=True,
61
+ model_backbone='phi3_v',
62
+ num_crops=16)
63
+
64
+ processor = load_processor(model_args)
65
 
66
  model = MMEBModel.load(model_args)
67
  model.eval()
68
  model = model.to('cuda', dtype=torch.bfloat16)
69
 
 
 
 
 
 
 
70
 
71
  # Image + Text -> Text
72
+ inputs = processor('<|image_1|> Represent the given image with the following question: What is in the image', [Image.open(
73
+ 'figures/example.jpg')])
74
  inputs = {key: value.to('cuda') for key, value in inputs.items()}
75
  qry_output = model(qry=inputs)["qry_reps"]
76
 
 
79
  inputs = {key: value.to('cuda') for key, value in inputs.items()}
80
  tgt_output = model(tgt=inputs)["tgt_reps"]
81
  print(string, '=', model.compute_similarity(qry_output, tgt_output))
82
+ ## A cat and a dog = tensor([[0.3008]], device='cuda:0', dtype=torch.bfloat16)
83
 
84
  string = 'A cat and a tiger'
85
  inputs = processor(string)
86
  inputs = {key: value.to('cuda') for key, value in inputs.items()}
87
  tgt_output = model(tgt=inputs)["tgt_reps"]
88
  print(string, '=', model.compute_similarity(qry_output, tgt_output))
89
+ ## A cat and a tiger = tensor([[0.2051]], device='cuda:0', dtype=torch.bfloat16)
90
 
91
  # Text -> Image
92
  inputs = processor('Find me an everyday image that matches the given caption: A cat and a dog.',)
 
98
  inputs = {key: value.to('cuda') for key, value in inputs.items()}
99
  tgt_output = model(tgt=inputs)["tgt_reps"]
100
  print(string, '=', model.compute_similarity(qry_output, tgt_output))
101
+ ## <|image_1|> Represent the given image. = tensor([[0.2930]], device='cuda:0', dtype=torch.bfloat16)
102
 
103
  inputs = processor('Find me an everyday image that matches the given caption: A cat and a tiger.',)
104
  inputs = {key: value.to('cuda') for key, value in inputs.items()}
 
109
  inputs = {key: value.to('cuda') for key, value in inputs.items()}
110
  tgt_output = model(tgt=inputs)["tgt_reps"]
111
  print(string, '=', model.compute_similarity(qry_output, tgt_output))
112
+ ## <|image_1|> Represent the given image. = tensor([[0.2012]], device='cuda:0', dtype=torch.bfloat16)
113
  ```
114
 
115
  ## Citation