Xenova HF staff commited on
Commit
c0b30ea
1 Parent(s): b00f8d3

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +61 -1
README.md CHANGED
@@ -1,3 +1,63 @@
1
  ---
 
2
  license: other
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ library_name: transformers.js
3
  license: other
4
+ tags:
5
+ - mobileclip
6
+ ---
7
+
8
+ https://github.com/apple/ml-mobileclip with ONNX weights to be compatible with Transformers.js.
9
+
10
+ ## Usage (Transformers.js)
11
+
12
+ If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@xenova/transformers) using:
13
+ ```bash
14
+ npm i @xenova/transformers
15
+ ```
16
+
17
+ **Example:** Perform zero-shot image classification.
18
+ ```js
19
+ import {
20
+ AutoTokenizer,
21
+ CLIPTextModelWithProjection,
22
+ AutoProcessor,
23
+ CLIPVisionModelWithProjection,
24
+ RawImage,
25
+ dot,
26
+ softmax,
27
+ } from '@xenova/transformers';
28
+
29
+ const model_id = 'Xenova/mobileclip_s0';
30
+
31
+ // Load tokenizer and text model
32
+ const tokenizer = await AutoTokenizer.from_pretrained(model_id);
33
+ const text_model = await CLIPTextModelWithProjection.from_pretrained(model_id);
34
+
35
+ // Load processor and vision model
36
+ const processor = await AutoProcessor.from_pretrained(model_id);
37
+ const vision_model = await CLIPVisionModelWithProjection.from_pretrained(model_id, {
38
+ quantized: false, // NOTE: vision model is sensitive to quantization.
39
+ });
40
+
41
+ // Run tokenization
42
+ const texts = ['cats', 'dogs', 'birds'];
43
+ const text_inputs = tokenizer(texts, { padding: 'max_length', truncation: true });
44
+
45
+ // Compute text embeddings
46
+ const { text_embeds } = await text_model(text_inputs);
47
+ const normalized_text_embeds = text_embeds.normalize().tolist();
48
+
49
+ // Read image and run processor
50
+ const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/cats.jpg';
51
+ const image = await RawImage.read(url);
52
+ const image_inputs = await processor(image);
53
+
54
+ // Compute vision embeddings
55
+ const { image_embeds } = await vision_model(image_inputs);
56
+ const normalized_image_embeds = image_embeds.normalize().tolist();
57
+
58
+ // Compute probabilities
59
+ const probabilities = normalized_image_embeds.map(
60
+ x => softmax(normalized_text_embeds.map(y => 100 * dot(x, y)))
61
+ );
62
+ console.log(probabilities); // [[ 0.999993040175817, 0.000006828091823929405, 1.3173235896278122e-7 ]]
63
+ ```