kimihailv commited on
Commit
0a543d3
1 Parent(s): 3d571f2

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +6 -20
README.md CHANGED
@@ -58,34 +58,20 @@ To encode data:
58
 
59
  ```python
60
  from PIL import Image
 
61
  text = 'a small red panda in a zoo'
62
  image = Image.open('red_panda.jpg')
63
- image_data = model.preprocess_image(image)
64
- text_data = model.preprocess_text(text)
65
- image_embedding = model.encode_image(image_data)
66
- text_embedding = model.encode_text(text_data)
67
- score, joint_embedding = model.encode_multimodal(
68
- image_features=image_features,
69
- text_features=text_features,
70
- attention_mask=text_data['attention_mask'],
71
- return_scores=True
72
- )
73
- ```
74
 
75
- To get features:
 
76
 
77
- ```python
78
  image_features, image_embedding = model.encode_image(image_data, return_features=True)
79
  text_features, text_embedding = model.encode_text(text_data, return_features=True)
80
- ```
81
-
82
- These features can later be used to produce joint multimodal encodings faster, as the first layers of the transformer can be skipped:
83
-
84
- ```python
85
- joint_embedding = model.encode_multimodal(
86
  image_features=image_features,
87
  text_features=text_features,
88
- attention_mask=text_data['attention_mask']
 
89
  )
90
  ```
91
 
 
58
 
59
  ```python
60
  from PIL import Image
61
+
62
  text = 'a small red panda in a zoo'
63
  image = Image.open('red_panda.jpg')
 
 
 
 
 
 
 
 
 
 
 
64
 
65
+ image_data = processor.preprocess_image(image)
66
+ text_data = processor.preprocess_text(text)
67
 
 
68
  image_features, image_embedding = model.encode_image(image_data, return_features=True)
69
  text_features, text_embedding = model.encode_text(text_data, return_features=True)
70
+ score, joint_embedding = model.encode_multimodal(
 
 
 
 
 
71
  image_features=image_features,
72
  text_features=text_features,
73
+ attention_mask=text_data['attention_mask'],
74
+ return_scores=True
75
  )
76
  ```
77