Benjamin CHAZELLE commited on
Commit
7370136
1 Parent(s): 08ecd45

update files

Browse files
README.md CHANGED
@@ -1,3 +1,50 @@
1
  ---
2
  license: mit
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: mit
3
  ---
4
+
5
+ The [**camembert-base**](https://huggingface.co/camembert-base] model) converted as ONNX format to be used with [Transformers.js](https://huggingface.co/docs/transformers.js/api/pipelines#module_pipelines.FillMaskPipeline)
6
+
7
+ ```js
8
+ <script type="module">
9
+ import { pipeline } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.6.2';
10
+
11
+ const unmasker = await pipeline('fill-mask', 'benjaminchazelle/camembert-base-onnx', { quantized: false })
12
+
13
+ console.log(await unmasker('La capitale de la France est<mask>'))
14
+
15
+ /*
16
+ [
17
+ {
18
+ "score": 0.45410260558128357,
19
+ "token": 186,
20
+ "token_str": "▁...",
21
+ "sequence": "La capitale de la France est..."
22
+ },
23
+ {
24
+ "score": 0.06095331534743309,
25
+ "token": 300,
26
+ "token_str": "▁Paris",
27
+ "sequence": "La capitale de la France est Paris"
28
+ },
29
+ {
30
+ "score": 0.04425867274403572,
31
+ "token": 16,
32
+ "token_str": "▁le",
33
+ "sequence": "La capitale de la France est le"
34
+ },
35
+ {
36
+ "score": 0.035498425364494324,
37
+ "token": 807,
38
+ "token_str": "▁[...]",
39
+ "sequence": "La capitale de la France est [...]"
40
+ },
41
+ {
42
+ "score": 0.03408252075314522,
43
+ "token": 13,
44
+ "token_str": "▁la",
45
+ "sequence": "La capitale de la France est la"
46
+ }
47
+ ]
48
+ */
49
+ </script>
50
+ ```
config.json CHANGED
@@ -1,12 +1,13 @@
1
  {
2
- "_name_or_path": "camembert_base_onnx",
3
  "architectures": [
4
  "CamembertForMaskedLM"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
- "bos_token_id": 5,
8
  "classifier_dropout": null,
9
- "eos_token_id": 6,
 
10
  "hidden_act": "gelu",
11
  "hidden_dropout_prob": 0.1,
12
  "hidden_size": 768,
@@ -18,7 +19,7 @@
18
  "num_attention_heads": 12,
19
  "num_hidden_layers": 12,
20
  "output_past": true,
21
- "pad_token_id": 1,
22
  "position_embedding_type": "absolute",
23
  "transformers_version": "4.33.3",
24
  "type_vocab_size": 1,
 
1
  {
2
+ "_name_or_path": "camembert/camembert-base",
3
  "architectures": [
4
  "CamembertForMaskedLM"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
  "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "eos_token_ids": 0,
11
  "hidden_act": "gelu",
12
  "hidden_dropout_prob": 0.1,
13
  "hidden_size": 768,
 
19
  "num_attention_heads": 12,
20
  "num_hidden_layers": 12,
21
  "output_past": true,
22
+ "pad_token_id": 0,
23
  "position_embedding_type": "absolute",
24
  "transformers_version": "4.33.3",
25
  "type_vocab_size": 1,
onnx/model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26b7ef2a0f4ae4b938e3d60aefc6910dc058fdbc3c7e2c19c8599dee842f68e4
3
+ size 541224953
onnx/model_quantized.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f854dd055a35a51075adcaf583e603615c68c0f42159d58f52a6d5b75b708d5
3
- size 540991078
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44ba450bbe12f1c8daa129020091ee60f067e5b04e8edbac850f0e0364e3f84b
3
+ size 136755047
ort_config.json DELETED
@@ -1,38 +0,0 @@
1
- {
2
- "one_external_file": true,
3
- "opset": null,
4
- "optimization": {
5
- "disable_attention": null,
6
- "disable_attention_fusion": false,
7
- "disable_bias_gelu": null,
8
- "disable_bias_gelu_fusion": false,
9
- "disable_bias_skip_layer_norm": null,
10
- "disable_bias_skip_layer_norm_fusion": false,
11
- "disable_embed_layer_norm": true,
12
- "disable_embed_layer_norm_fusion": true,
13
- "disable_gelu": null,
14
- "disable_gelu_fusion": false,
15
- "disable_group_norm_fusion": true,
16
- "disable_layer_norm": null,
17
- "disable_layer_norm_fusion": false,
18
- "disable_packed_kv": true,
19
- "disable_shape_inference": true,
20
- "disable_skip_layer_norm": null,
21
- "disable_skip_layer_norm_fusion": false,
22
- "enable_gelu_approximation": false,
23
- "enable_gemm_fast_gelu_fusion": false,
24
- "enable_transformers_specific_optimizations": true,
25
- "fp16": false,
26
- "no_attention_mask": false,
27
- "optimization_level": 2,
28
- "optimize_for_gpu": false,
29
- "optimize_with_onnxruntime_only": null,
30
- "use_mask_index": false,
31
- "use_multi_head_attention": false,
32
- "use_raw_attention_mask": false
33
- },
34
- "optimum_version": "1.13.2",
35
- "quantization": {},
36
- "transformers_version": "4.33.3",
37
- "use_external_data_format": false
38
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
quantize_config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_channel": true,
3
+ "reduce_range": true,
4
+ "per_model_config": {
5
+ "model": {
6
+ "op_types": [
7
+ "Reshape",
8
+ "Concat",
9
+ "Expand",
10
+ "Sqrt",
11
+ "Sub",
12
+ "Slice",
13
+ "ReduceMean",
14
+ "Erf",
15
+ "Gather",
16
+ "Where",
17
+ "Shape",
18
+ "Transpose",
19
+ "Div",
20
+ "Softmax",
21
+ "Cast",
22
+ "Mul",
23
+ "MatMul",
24
+ "Pow",
25
+ "ConstantOfShape",
26
+ "CumSum",
27
+ "Unsqueeze",
28
+ "Not",
29
+ "Add",
30
+ "Constant",
31
+ "Equal"
32
+ ],
33
+ "weight_type": "QInt8"
34
+ }
35
+ }
36
+ }
special_tokens_map.json CHANGED
@@ -9,7 +9,7 @@
9
  "mask_token": {
10
  "content": "<mask>",
11
  "lstrip": true,
12
- "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
 
9
  "mask_token": {
10
  "content": "<mask>",
11
  "lstrip": true,
12
+ "normalized": true,
13
  "rstrip": false,
14
  "single_word": false
15
  },
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -15,9 +15,10 @@
15
  "rstrip": false,
16
  "single_word": false
17
  },
18
- "model_max_length": 512,
19
  "pad_token": "<pad>",
20
  "sep_token": "</s>",
 
21
  "tokenizer_class": "CamembertTokenizer",
22
  "unk_token": "<unk>"
23
  }
 
15
  "rstrip": false,
16
  "single_word": false
17
  },
18
+ "model_max_length": 1000000000000000019884624838656,
19
  "pad_token": "<pad>",
20
  "sep_token": "</s>",
21
+ "sp_model_kwargs": {},
22
  "tokenizer_class": "CamembertTokenizer",
23
  "unk_token": "<unk>"
24
  }