docs: update the example
Browse files
README.md
CHANGED
@@ -166,15 +166,18 @@ This dual capability makes it an excellent tool for multimodal retrieval-augment
|
|
166 |
from transformers import AutoModel
|
167 |
|
168 |
# Initialize the model
|
169 |
-
model = AutoModel.from_pretrained(
|
170 |
|
171 |
# Sentences
|
172 |
-
sentences = [
|
|
|
|
|
|
|
173 |
|
174 |
# Public image URLs
|
175 |
image_urls = [
|
176 |
-
|
177 |
-
|
178 |
]
|
179 |
|
180 |
# Choose a matryoshka dimension, set to None to get the full 1024-dim vectors
|
@@ -182,14 +185,21 @@ truncate_dim = 512
|
|
182 |
|
183 |
# Encode text and images
|
184 |
text_embeddings = model.encode_text(sentences, truncate_dim=truncate_dim)
|
185 |
-
image_embeddings = model.encode_image(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
186 |
|
187 |
# Compute similarities
|
188 |
-
print(
|
189 |
-
print(
|
190 |
-
print(
|
191 |
-
print(
|
192 |
-
print(text_embeddings[1] @ image_embeddings[1].T)# text-image cross-modal similarity
|
193 |
```
|
194 |
|
195 |
or via sentence-transformers:
|
|
|
166 |
from transformers import AutoModel
|
167 |
|
168 |
# Initialize the model
|
169 |
+
model = AutoModel.from_pretrained("jinaai/jina-clip-v2", trust_remote_code=True)
|
170 |
|
171 |
# Sentences
|
172 |
+
sentences = [
|
173 |
+
"A neural network walks into a bar and forgets why it came.",
|
174 |
+
"Why do programmers prefer dark mode? Because light attracts bugs.",
|
175 |
+
]
|
176 |
|
177 |
# Public image URLs
|
178 |
image_urls = [
|
179 |
+
"https://i.pinimg.com/600x315/21/48/7e/21487e8e0970dd366dafaed6ab25d8d8.jpg",
|
180 |
+
"https://i.pinimg.com/736x/c9/f2/3e/c9f23e212529f13f19bad5602d84b78b.jpg",
|
181 |
]
|
182 |
|
183 |
# Choose a matryoshka dimension, set to None to get the full 1024-dim vectors
|
|
|
185 |
|
186 |
# Encode text and images
|
187 |
text_embeddings = model.encode_text(sentences, truncate_dim=truncate_dim)
|
188 |
+
image_embeddings = model.encode_image(
|
189 |
+
image_urls, truncate_dim=truncate_dim
|
190 |
+
) # also accepts PIL.image, local filenames, dataURI
|
191 |
+
|
192 |
+
# Encode query text
|
193 |
+
query = "tell me a joke about AI"
|
194 |
+
text_query_embeddings = model.encode_text(
|
195 |
+
query, task="retrieval.query", truncate_dim=truncate_dim
|
196 |
+
)
|
197 |
|
198 |
# Compute similarities
|
199 |
+
print(text_query_embeddings @ text_embeddings[1].T) # text embedding similarity
|
200 |
+
print(text_query_embeddings @ image_embeddings[0].T) # text-image cross-modal similarity
|
201 |
+
print(image_embeddings[0] @ image_embeddings[1].T) # image-image cross-modal similarity
|
202 |
+
print(image_embeddings[0] @ text_embeddings[0].T) # image-text cross-modal similarity
|
|
|
203 |
```
|
204 |
|
205 |
or via sentence-transformers:
|