Xenova HF staff commited on
Commit
181f4c3
1 Parent(s): 586c311

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +59 -1
README.md CHANGED
@@ -1,3 +1,61 @@
1
  ---
 
2
  license: other
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ library_name: transformers.js
3
  license: other
4
+ ---
5
+
6
+ https://github.com/apple/ml-mobileclip with ONNX weights to be compatible with Transformers.js.
7
+
8
+ ## Usage (Transformers.js)
9
+
10
+ If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@xenova/transformers) using:
11
+ ```bash
12
+ npm i @xenova/transformers
13
+ ```
14
+
15
+ **Example:** Perform zero-shot image classification.
16
+ ```js
17
+ import {
18
+ AutoTokenizer,
19
+ CLIPTextModelWithProjection,
20
+ AutoProcessor,
21
+ CLIPVisionModelWithProjection,
22
+ RawImage,
23
+ dot,
24
+ softmax,
25
+ } from '@xenova/transformers';
26
+
27
+ const model_id = 'Xenova/mobileclip_s0';
28
+
29
+ // Load tokenizer and text model
30
+ const tokenizer = await AutoTokenizer.from_pretrained(model_id);
31
+ const text_model = await CLIPTextModelWithProjection.from_pretrained(model_id);
32
+
33
+ // Load processor and vision model
34
+ const processor = await AutoProcessor.from_pretrained(model_id);
35
+ const vision_model = await CLIPVisionModelWithProjection.from_pretrained(model_id, {
36
+ quantized: false, // NOTE: vision model is sensitive to quantization.
37
+ });
38
+
39
+ // Run tokenization
40
+ const texts = ['cats', 'dogs', 'birds'];
41
+ const text_inputs = tokenizer(texts, { padding: 'max_length', truncation: true });
42
+
43
+ // Compute text embeddings
44
+ const { text_embeds } = await text_model(text_inputs);
45
+ const normalized_text_embeds = text_embeds.normalize().tolist();
46
+
47
+ // Read image and run processor
48
+ const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/cats.jpg';
49
+ const image = await RawImage.read(url);
50
+ const image_inputs = await processor(image);
51
+
52
+ // Compute vision embeddings
53
+ const { image_embeds } = await vision_model(image_inputs);
54
+ const normalized_image_embeds = image_embeds.normalize().tolist();
55
+
56
+ // Compute probabilities
57
+ const probabilities = normalized_image_embeds.map(
58
+ x => softmax(normalized_text_embeds.map(y => 100 * dot(x, y)))
59
+ );
60
+ console.log(probabilities); // [[ 0.9999973851268408, 0.000002399646544186113, 2.1522661499262862e-7 ]]
61
+ ```