dannymartin commited on
Commit
1c280e3
1 Parent(s): 98b9a08

Add SetFit model

Browse files
1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 384,
3
+ "pooling_mode_cls_token": true,
4
+ "pooling_mode_mean_tokens": false,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
README.md ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: avsolatorio/GIST-small-Embedding-v0
3
+ library_name: setfit
4
+ metrics:
5
+ - accuracy
6
+ pipeline_tag: text-classification
7
+ tags:
8
+ - setfit
9
+ - sentence-transformers
10
+ - text-classification
11
+ - generated_from_setfit_trainer
12
+ widget:
13
+ - text: 'Recipe: Maple Breakfast Bake
14
+
15
+ Description: At the cottage or for a decadent weekend breakfast, serve this sweet
16
+ fruity bake. It can be assembled the night before and refrigerated for ease of
17
+ preparation in the morning.
18
+
19
+ Ingredients: [{"text": "1 loaf egg challah (or other firm white bread)"}, {"text":
20
+ "1 (250 g) package cream cheese, light"}, {"text": "2 cups blueberries (fresh
21
+ or frozen)"}, {"text": "8 eggs, beaten"}, {"text": "1 1/2 cups milk"}, {"text":
22
+ "1/4 cup maple syrup"}, {"text": "1/4 cup butter, melted"}]
23
+
24
+ Instructions: Preheat oven to 350°F.
25
+
26
+ Remove crust from bread; cut in 1-inch cubes (makes about 10 cups).
27
+
28
+ Cut cream cheese into cubes (makes about 1 cup).
29
+
30
+ Grease a 9-inch square baking dish. Place half the bread cubes in the dish.
31
+
32
+ Scatter cream cheese cubes and about half the blueberries over the bread.
33
+
34
+ Top with remaining bread cubes and blueberries.
35
+
36
+ In a bowl, combine eggs, milk, maple syrup and butter. Carefully pour over bread
37
+ mixture.
38
+
39
+ Bake until a knife inserted in the center comes out clean, about 1 hour, covering
40
+ with aluminum foil if edges brown too much. To serve, cut in squares.
41
+
42
+ Serve with additional maple syrup if desired.
43
+
44
+ '
45
+ - text: 'Recipe: Amaretto Cream Baked Rice Pudding
46
+
47
+ Description: Can''t say much but YUM! I always mess up rice pudding when I do
48
+ it on the stove. Not only is this baked, but the amaretto is such a great flavor
49
+ twist!
50
+
51
+ Ingredients: [{"text": "1 pint amaretto flavored coffee creamer"}, {"text": "5
52
+ cups of cooked white rice"}, {"text": "2 eggs"}, {"text": "1 cup raisins (optional)"},
53
+ {"text": "1 cup white sugar"}, {"text": "2 teaspoons ground nutmeg"}, {"text":
54
+ "2 teaspoons cinnamon"}]
55
+
56
+ Instructions: Preheat oven to 350 degrees.
57
+
58
+ Butter a small rectangular glass baking dish or a 2 quart casserole dish.
59
+
60
+ Mix all ingredients in a large bowl and pour into prepared dish.
61
+
62
+ Sprinkle additional cinnamon and nutmeg on top, if desired.
63
+
64
+ Bake for 35-40 minutes.
65
+
66
+ Cool completely and serve with whipped cream.
67
+
68
+ '
69
+ - text: 'Recipe: Pasta Salad With Pesto(ATK)
70
+
71
+ Description: This salad is best served the day it is made; if it''s been refrigerated,
72
+ bring it to room temperature before serving. The pesto can be made a day ahead--just
73
+ cook the garlic cloves in a small saucepan of boiling water for 1 minute. Garnish
74
+ with additional shaved or grated Parmesan.
75
+
76
+ Ingredients: [{"text": "3/4 cup pine nuts"}, {"text": "2 medium garlic cloves,
77
+ unpeeled"}, {"text": "1 tablespoon salt"}, {"text": "1 teaspoon salt"}, {"text":
78
+ "1 lb bow tie pasta"}, {"text": "1/4 cup extra-virgin olive oil"}, {"text": "1
79
+ tablespoon extra-virgin olive oil"}, {"text": "3 cups fresh basil leaves, packed
80
+ (about 4 ounces)"}, {"text": "1 cup Baby Spinach, about 1 ounce (packed)"}, {"text":
81
+ "1/2 teaspoon pepper"}, {"text": "2 tablespoons lemon juice"}, {"text": "1 1/2
82
+ ounces parmesan cheese, grated (3/4 cup)"}, {"text": "6 tablespoons mayonnaise"},
83
+ {"text": "1 pint cherry tomatoes, quartered (optional) or 1 pint grape tomatoes,
84
+ halved (optional)"}]
85
+
86
+ Instructions: Bring 4 quarts water to rolling boil in large pot. Toast.
87
+
88
+ pine nuts in small dry skillet over medium heat, shaking pan occasionally, until
89
+ just golden and fragrant, 4 to 5 minutes.
90
+
91
+ When water is boiling, add garlic and let cook 1 minute. Remove.
92
+
93
+ garlic with slotted spoon and rinse under cold water to stop cooking;.
94
+
95
+ set aside to cool. Add 1 tablespoon salt and pasta to water, stir.
96
+
97
+ to separate, and cook until tender (just past al dente). Reserve ¼.
98
+
99
+ cup cooking water, drain pasta, toss with 1 tablespoon oil, spread in.
100
+
101
+ single layer on rimmed baking sheet, and cool to room temperature,.
102
+
103
+ about 30 minutes.
104
+
105
+ When garlic is cool, peel and mince or press through garlic.
106
+
107
+ press. Place ¼ cup nuts, garlic, basil, spinach, pepper, lemon.
108
+
109
+ juice, remaining ¼ cup oil, and remaining 1 teaspoon salt in.
110
+
111
+ bowl of food processor and process until smooth, scraping sides of.
112
+
113
+ bowl as necessary. Add cheese and mayonnaise and process until.
114
+
115
+ thoroughly combined. Transfer mixture to large serving bowl. Cover.
116
+
117
+ and refrigerate until ready to assemble salad.
118
+
119
+ When pasta is cool, toss with pesto, adding reserved pasta water,.
120
+
121
+ 1 tablespoon at a time, until pesto evenly coats pasta. Fold in remaining.
122
+
123
+ ½ cup nuts and tomatoes (if using); serve.
124
+
125
+ '
126
+ - text: 'Recipe: Spiked Monkfish Over Couscous
127
+
128
+ Description: None
129
+
130
+ Ingredients: [{"text": "1/4 cup olive oil"}, {"text": "1 onion, finely chopped"},
131
+ {"text": "2 green bell peppers, finely chopped"}, {"text": "4 carrots, thinly
132
+ sliced"}, {"text": "14 oz can plum tomatoes, chopped with juices"}, {"text": "1/2
133
+ teaspoon each ground cumin, crumbled saffron and hot red pepper"}, {"text": "1
134
+ 1/2 pounds skinless monkfish fillet, cut into 3/4-inch chunks"}, {"text": "2 cups
135
+ fish stock or clam juice"}, {"text": "10-ounce box (1 3/4 cups) plain couscous"},
136
+ {"text": "2 tablespoons unsalted butter"}, {"text": "Salt and pepper"}]
137
+
138
+ Instructions: Heat the olive oil in a large skillet. Add onions and peppers and
139
+ saute for about a minute. Cover and cook until onions are tender, about 5 minutes.
140
+ Add the carrots, tomatoes and their juices, cumin, saffron and crushed pepper.
141
+ Cover and cook until carrots are tender, about 5 minutes
142
+
143
+ Add chick peas and monkfish. Cover and simmer until fish is cooked through, about
144
+ 10 minutes While fish is cooking, bring fish stock to a boil. Add couscous and
145
+ immediately remove from heat. Let couscous steep about 5 minutes. Remove from
146
+ heat and add butter; season to taste with salt and pepper. Ladle couscous in a
147
+ bowl and ladle soupy monkfish stew over the top.
148
+
149
+ '
150
+ - text: 'Recipe: Chicken Herbed Rice
151
+
152
+ Description: This is a terrific side dish... or add roasted chicken and turn it
153
+ into a lovely main course!
154
+
155
+ Ingredients: [{"text": "1 tablespoon(s) chicken bouillon granules"}, {"text":
156
+ "1 tablespoon(s) dried parsley"}, {"text": "1 teaspoon(s) dried celery leaves"},
157
+ {"text": "1 teaspoon(s) dried minced onion"}, {"text": "1 teaspoon(s) sugar"},
158
+ {"text": "2 tablespoon(s) butter"}, {"text": "1 cup(s) rice"}, {"text": "2 cup(s)
159
+ water"}]
160
+
161
+ Instructions: In skillet, melt butter. Add rice and saute until light brown.
162
+
163
+ Add water and seasonings. Bring to a boil. Cover and simmer on low for 20 minutes.
164
+
165
+ '
166
+ inference: false
167
+ model-index:
168
+ - name: SetFit with avsolatorio/GIST-small-Embedding-v0
169
+ results:
170
+ - task:
171
+ type: text-classification
172
+ name: Text Classification
173
+ dataset:
174
+ name: Unknown
175
+ type: unknown
176
+ split: test
177
+ metrics:
178
+ - type: accuracy
179
+ value: 0.0
180
+ name: Accuracy
181
+ ---
182
+
183
+ # SetFit with avsolatorio/GIST-small-Embedding-v0
184
+
185
+ This is a [SetFit](https://github.com/huggingface/setfit) model that can be used for Text Classification. This SetFit model uses [avsolatorio/GIST-small-Embedding-v0](https://huggingface.co/avsolatorio/GIST-small-Embedding-v0) as the Sentence Transformer embedding model. A MultiOutputClassifier instance is used for classification.
186
+
187
+ The model has been trained using an efficient few-shot learning technique that involves:
188
+
189
+ 1. Fine-tuning a [Sentence Transformer](https://www.sbert.net) with contrastive learning.
190
+ 2. Training a classification head with features from the fine-tuned Sentence Transformer.
191
+
192
+ ## Model Details
193
+
194
+ ### Model Description
195
+ - **Model Type:** SetFit
196
+ - **Sentence Transformer body:** [avsolatorio/GIST-small-Embedding-v0](https://huggingface.co/avsolatorio/GIST-small-Embedding-v0)
197
+ - **Classification head:** a MultiOutputClassifier instance
198
+ - **Maximum Sequence Length:** 512 tokens
199
+ <!-- - **Number of Classes:** Unknown -->
200
+ <!-- - **Training Dataset:** [Unknown](https://huggingface.co/datasets/unknown) -->
201
+ <!-- - **Language:** Unknown -->
202
+ <!-- - **License:** Unknown -->
203
+
204
+ ### Model Sources
205
+
206
+ - **Repository:** [SetFit on GitHub](https://github.com/huggingface/setfit)
207
+ - **Paper:** [Efficient Few-Shot Learning Without Prompts](https://arxiv.org/abs/2209.11055)
208
+ - **Blogpost:** [SetFit: Efficient Few-Shot Learning Without Prompts](https://huggingface.co/blog/setfit)
209
+
210
+ ## Evaluation
211
+
212
+ ### Metrics
213
+ | Label | Accuracy |
214
+ |:--------|:---------|
215
+ | **all** | 0.0 |
216
+
217
+ ## Uses
218
+
219
+ ### Direct Use for Inference
220
+
221
+ First install the SetFit library:
222
+
223
+ ```bash
224
+ pip install setfit
225
+ ```
226
+
227
+ Then you can load this model and run inference.
228
+
229
+ ```python
230
+ from setfit import SetFitModel
231
+
232
+ # Download from the 🤗 Hub
233
+ model = SetFitModel.from_pretrained("dannymartin/setfit")
234
+ # Run inference
235
+ preds = model("Recipe: Chicken Herbed Rice
236
+ Description: This is a terrific side dish... or add roasted chicken and turn it into a lovely main course!
237
+ Ingredients: [{\"text\": \"1 tablespoon(s) chicken bouillon granules\"}, {\"text\": \"1 tablespoon(s) dried parsley\"}, {\"text\": \"1 teaspoon(s) dried celery leaves\"}, {\"text\": \"1 teaspoon(s) dried minced onion\"}, {\"text\": \"1 teaspoon(s) sugar\"}, {\"text\": \"2 tablespoon(s) butter\"}, {\"text\": \"1 cup(s) rice\"}, {\"text\": \"2 cup(s) water\"}]
238
+ Instructions: In skillet, melt butter. Add rice and saute until light brown.
239
+ Add water and seasonings. Bring to a boil. Cover and simmer on low for 20 minutes.
240
+ ")
241
+ ```
242
+
243
+ <!--
244
+ ### Downstream Use
245
+
246
+ *List how someone could finetune this model on their own dataset.*
247
+ -->
248
+
249
+ <!--
250
+ ### Out-of-Scope Use
251
+
252
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
253
+ -->
254
+
255
+ <!--
256
+ ## Bias, Risks and Limitations
257
+
258
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
259
+ -->
260
+
261
+ <!--
262
+ ### Recommendations
263
+
264
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
265
+ -->
266
+
267
+ ## Training Details
268
+
269
+ ### Training Set Metrics
270
+ | Training set | Min | Median | Max |
271
+ |:-------------|:----|:-------|:----|
272
+ | Word count | 4 | 4.2308 | 5 |
273
+
274
+ ### Training Hyperparameters
275
+ - batch_size: (32, 32)
276
+ - num_epochs: (1, 1)
277
+ - max_steps: -1
278
+ - sampling_strategy: oversampling
279
+ - body_learning_rate: (2e-05, 1e-05)
280
+ - head_learning_rate: 0.01
281
+ - loss: CosineSimilarityLoss
282
+ - distance_metric: cosine_distance
283
+ - margin: 0.25
284
+ - end_to_end: False
285
+ - use_amp: False
286
+ - warmup_proportion: 0.1
287
+ - seed: 42
288
+ - eval_max_steps: -1
289
+ - load_best_model_at_end: True
290
+
291
+ ### Training Results
292
+ | Epoch | Step | Training Loss | Validation Loss |
293
+ |:-------:|:------:|:-------------:|:---------------:|
294
+ | 0.05 | 1 | 0.3162 | - |
295
+ | **1.0** | **20** | **-** | **0.2636** |
296
+
297
+ * The bold row denotes the saved checkpoint.
298
+ ### Framework Versions
299
+ - Python: 3.10.12
300
+ - SetFit: 1.0.3
301
+ - Sentence Transformers: 3.0.1
302
+ - Transformers: 4.42.3
303
+ - PyTorch: 2.3.1+cu121
304
+ - Datasets: 2.20.0
305
+ - Tokenizers: 0.19.1
306
+
307
+ ## Citation
308
+
309
+ ### BibTeX
310
+ ```bibtex
311
+ @article{https://doi.org/10.48550/arxiv.2209.11055,
312
+ doi = {10.48550/ARXIV.2209.11055},
313
+ url = {https://arxiv.org/abs/2209.11055},
314
+ author = {Tunstall, Lewis and Reimers, Nils and Jo, Unso Eun Seo and Bates, Luke and Korat, Daniel and Wasserblat, Moshe and Pereg, Oren},
315
+ keywords = {Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences},
316
+ title = {Efficient Few-Shot Learning Without Prompts},
317
+ publisher = {arXiv},
318
+ year = {2022},
319
+ copyright = {Creative Commons Attribution 4.0 International}
320
+ }
321
+ ```
322
+
323
+ <!--
324
+ ## Glossary
325
+
326
+ *Clearly define terms in order to be accessible across audiences.*
327
+ -->
328
+
329
+ <!--
330
+ ## Model Card Authors
331
+
332
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
333
+ -->
334
+
335
+ <!--
336
+ ## Model Card Contact
337
+
338
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
339
+ -->
config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "checkpoints/step_20",
3
+ "architectures": [
4
+ "BertModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 384,
11
+ "id2label": {
12
+ "0": "LABEL_0"
13
+ },
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 1536,
16
+ "label2id": {
17
+ "LABEL_0": 0
18
+ },
19
+ "layer_norm_eps": 1e-12,
20
+ "max_position_embeddings": 512,
21
+ "model_type": "bert",
22
+ "num_attention_heads": 12,
23
+ "num_hidden_layers": 12,
24
+ "pad_token_id": 0,
25
+ "position_embedding_type": "absolute",
26
+ "torch_dtype": "float32",
27
+ "transformers_version": "4.42.3",
28
+ "type_vocab_size": 2,
29
+ "use_cache": true,
30
+ "vocab_size": 30522
31
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.0.1",
4
+ "transformers": "4.42.3",
5
+ "pytorch": "2.3.1+cu121"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": null
10
+ }
config_setfit.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "labels": null,
3
+ "normalize_embeddings": false
4
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aab8af3296eb8871c79e07f139646e86938dd7e81eec6b8a54fada4db3b4d909
3
+ size 133462128
model_head.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c11b93951000a1342e548669d02453a5b051a4842d7d3f2a34f815d317239035
3
+ size 45665
modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": true
4
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "max_length": 512,
50
+ "model_max_length": 512,
51
+ "never_split": null,
52
+ "pad_to_multiple_of": null,
53
+ "pad_token": "[PAD]",
54
+ "pad_token_type_id": 0,
55
+ "padding_side": "right",
56
+ "sep_token": "[SEP]",
57
+ "stride": 0,
58
+ "strip_accents": null,
59
+ "tokenize_chinese_chars": true,
60
+ "tokenizer_class": "BertTokenizer",
61
+ "truncation_side": "right",
62
+ "truncation_strategy": "longest_first",
63
+ "unk_token": "[UNK]"
64
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff