Marqo
/

marqo-fashionCLIP

Zero-Shot Image Classification

Transformers.js

multimodal retrieval

Model card Files Files and versions Community

elliesleightholm commited on Sep 3, 2024

Commit

6513dfd

·

verified ·

1 Parent(s): c418b29

Update README.md

Files changed (1) hide show

README.md +1 -13

README.md CHANGED Viewed

@@ -53,6 +53,7 @@ with torch.no_grad(), torch.cuda.amp.autocast():
     text_probs = (100.0 * image_features @ text_features.T).softmax(dim=-1)
 print("Label probs:", text_probs)
 ```
 ### Transformers.js
@@ -86,12 +87,6 @@ const text_inputs = tokenizer(texts, { padding: 'max_length', truncation: true }
 // Compute text embeddings
 const { text_embeds } = await text_model(text_inputs);
-// Tensor {
-//   dims: [ 2, 512 ],
-//   type: 'float32',
-//   data: Float32Array(1024) [ ... ],
-//   size: 1024
-// }
 // Read image and run processor
 const image = await RawImage.read('https://raw.githubusercontent.com/marqo-ai/marqo-FashionCLIP/main/docs/fashion-hippo.png');
@@ -99,13 +94,6 @@ const image_inputs = await processor(image);
 // Compute vision embeddings
 const { image_embeds } = await vision_model(image_inputs);
-// Tensor {
-//   dims: [ 1, 512 ],
-//   type: 'float32',
-//   data: Float32Array(512) [ ... ],
-//   size: 512
-// }
 // Compute similarity scores
 const normalized_text_embeds = text_embeds.normalize().tolist();

     text_probs = (100.0 * image_features @ text_features.T).softmax(dim=-1)
 print("Label probs:", text_probs)
+# [0.9998498302475922, 0.000119267522939106, 0.000030902229468640687]
 ```
 ### Transformers.js
 // Compute text embeddings
 const { text_embeds } = await text_model(text_inputs);
 // Read image and run processor
 const image = await RawImage.read('https://raw.githubusercontent.com/marqo-ai/marqo-FashionCLIP/main/docs/fashion-hippo.png');
 // Compute vision embeddings
 const { image_embeds } = await vision_model(image_inputs);
 // Compute similarity scores
 const normalized_text_embeds = text_embeds.normalize().tolist();