Update model, now trined with OpenAssistant dataset in ChatML format (#6)
Browse files- Update model, now trined with OpenAssistant dataset in ChatML format (b1ab71e51a42f80f59698d441603f0b6e973edd1)
- Delete safetensors of the old model (98ac10aad8e07b1c70ca80aa8482894efd3c52ea)
- Update model details and training info (51573d6ebebec66c5315a77d35ef98be49d0102e)
- Delete training_params.json as the training info on the Readme explains it better (c7bd186829f68f6b8d20689e74b6462e6e821467)
- README.md +48 -27
- config.json +5 -6
- generation_config.json +2 -3
- model.safetensors → pytorch_model.bin +2 -2
- special_tokens_map.json +2 -2
- tokenizer.json +7 -7
- tokenizer_config.json +7 -5
- training_params.json +0 -47
README.md
CHANGED
@@ -5,45 +5,66 @@ tags:
|
|
5 |
- text-generation
|
6 |
base_model: Locutusque/TinyMistral-248M
|
7 |
datasets:
|
8 |
-
-
|
9 |
-
- KnutJaegersberg/WizardLM_evol_instruct_V2_196k_instruct_format
|
10 |
widget:
|
11 |
- text: |-
|
12 |
-
|
13 |
-
Write the specs of a game about trolls and warriors in a fantasy world
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
### Instruction:
|
25 |
-
What are some potential applications for quantum computing?
|
26 |
-
|
27 |
-
### Response:
|
28 |
inference:
|
29 |
parameters:
|
30 |
max_new_tokens: 64
|
31 |
repetition_penalty: 1.18
|
32 |
---
|
33 |
|
34 |
-
# Locutusque's TinyMistral-248M trained on
|
35 |
|
36 |
-
- Base model: [Locutusque/TinyMistral-248M](https://huggingface.co/Locutusque/TinyMistral-248M)
|
37 |
-
- Dataset: [
|
38 |
-
- Trained with [AutoTrain Advanced](https://github.com/huggingface/autotrain-advanced) using [these parameters](https://huggingface.co/Felladrin/TinyMistral-248M-Evol-Instruct/blob/321787e81e2eb0392d7ce2715154fb9c254e39b1/training_params.json) and [this CSV file](https://huggingface.co/datasets/KnutJaegersberg/WizardLM_evol_instruct_V2_196k_instruct_format/blob/93aa373501f829449f23efc91b3ac6e7a60a4d70/all_instructions.csv)
|
39 |
-
- Availability in other ML formats:
|
40 |
-
- GGUF: [Felladrin/gguf-TinyMistral-248M-Evol-Instruct](https://huggingface.co/Felladrin/gguf-TinyMistral-248M-Evol-Instruct)
|
41 |
|
42 |
## Recommended Prompt Format
|
43 |
|
44 |
```
|
45 |
-
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
```
|
|
|
5 |
- text-generation
|
6 |
base_model: Locutusque/TinyMistral-248M
|
7 |
datasets:
|
8 |
+
- OpenAssistant/oasst_top1_2023-08-25
|
|
|
9 |
widget:
|
10 |
- text: |-
|
11 |
+
<|im_start|>user
|
12 |
+
Write the specs of a game about trolls and warriors in a fantasy world.<|im_end|>
|
13 |
+
<|im_start|>assistant
|
14 |
+
The game is an adventure game that takes place on a planet, where players must explore their unique abilities to survive. Players can use different strategies such as collecting items or trading them for gold or silver coins, but they also need to learn how to deal with obstacles and find new ways to escape.<|im_end|>
|
15 |
+
<|im_start|>user
|
16 |
+
Could you tell me something curious about the Earth?<|im_end|>
|
17 |
+
<|im_start|>assistant
|
18 |
+
The planet is a large, rocky world with an atmosphere of 10 billion years old and a surface area around 25 million miles (36 million kilometers) wide.<|im_end|>
|
19 |
+
<|im_start|>user
|
20 |
+
What are some potential applications for quantum computing?<|im_end|>
|
21 |
+
<|im_start|>assistant
|
|
|
|
|
|
|
|
|
|
|
22 |
inference:
|
23 |
parameters:
|
24 |
max_new_tokens: 64
|
25 |
repetition_penalty: 1.18
|
26 |
---
|
27 |
|
28 |
+
# Locutusque's TinyMistral-248M trained on OpenAssistant TOP-1 Conversation Threads
|
29 |
|
30 |
+
- Base model: [Locutusque/TinyMistral-248M](https://huggingface.co/Locutusque/TinyMistral-248M/blob/90b89d18fdf27937dc04ab8a9b543c5af2991c7f/README.md)
|
31 |
+
- Dataset: [OpenAssistant/oasst_top1_2023-08-25](https://huggingface.co/datasets/OpenAssistant/oasst_top1_2023-08-25)
|
|
|
|
|
|
|
32 |
|
33 |
## Recommended Prompt Format
|
34 |
|
35 |
```
|
36 |
+
<|im_start|>user
|
37 |
+
{message}<|im_end|>
|
38 |
+
<|im_start|>assistant
|
39 |
+
```
|
40 |
+
|
41 |
+
## How it was trained
|
42 |
+
|
43 |
+
```ipython
|
44 |
+
%pip install autotrain-advanced
|
45 |
+
|
46 |
+
!autotrain setup
|
47 |
|
48 |
+
!autotrain llm \
|
49 |
+
--train \
|
50 |
+
--trainer "sft" \
|
51 |
+
--model './TinyMistral-248M/' \
|
52 |
+
--model_max_length 4096 \
|
53 |
+
--block-size 1024 \
|
54 |
+
--project-name 'trained-model' \
|
55 |
+
--data-path "OpenAssistant/oasst_top1_2023-08-25" \
|
56 |
+
--train_split "train" \
|
57 |
+
--valid_split "test" \
|
58 |
+
--text-column "text" \
|
59 |
+
--lr 1e-5 \
|
60 |
+
--train_batch_size 2 \
|
61 |
+
--epochs 5 \
|
62 |
+
--evaluation_strategy "steps" \
|
63 |
+
--save-strategy "steps" \
|
64 |
+
--save-total-limit 2 \
|
65 |
+
--warmup-ratio 0.05 \
|
66 |
+
--weight-decay 0.0 \
|
67 |
+
--gradient-accumulation 8 \
|
68 |
+
--logging-steps 10 \
|
69 |
+
--scheduler "constant"
|
70 |
```
|
config.json
CHANGED
@@ -1,11 +1,10 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"architectures": [
|
4 |
"MistralForCausalLM"
|
5 |
],
|
6 |
-
"bos_token_id":
|
7 |
-
"eos_token_id":
|
8 |
-
"pad_token_id": 32002,
|
9 |
"hidden_act": "silu",
|
10 |
"hidden_size": 1024,
|
11 |
"initializer_range": 0.02,
|
@@ -19,8 +18,8 @@
|
|
19 |
"rope_theta": 10000.0,
|
20 |
"sliding_window": 32,
|
21 |
"tie_word_embeddings": false,
|
22 |
-
"torch_dtype": "
|
23 |
"transformers_version": "4.34.1",
|
24 |
-
"use_cache":
|
25 |
"vocab_size": 32003
|
26 |
}
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "./TinyMistral-248M/",
|
3 |
"architectures": [
|
4 |
"MistralForCausalLM"
|
5 |
],
|
6 |
+
"bos_token_id": 1,
|
7 |
+
"eos_token_id": 2,
|
|
|
8 |
"hidden_act": "silu",
|
9 |
"hidden_size": 1024,
|
10 |
"initializer_range": 0.02,
|
|
|
18 |
"rope_theta": 10000.0,
|
19 |
"sliding_window": 32,
|
20 |
"tie_word_embeddings": false,
|
21 |
+
"torch_dtype": "float32",
|
22 |
"transformers_version": "4.34.1",
|
23 |
+
"use_cache": false,
|
24 |
"vocab_size": 32003
|
25 |
}
|
generation_config.json
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
{
|
2 |
"_from_model_config": true,
|
3 |
-
"bos_token_id":
|
4 |
-
"eos_token_id":
|
5 |
-
"pad_token_id": 32002,
|
6 |
"transformers_version": "4.34.1"
|
7 |
}
|
|
|
1 |
{
|
2 |
"_from_model_config": true,
|
3 |
+
"bos_token_id": 1,
|
4 |
+
"eos_token_id": 2,
|
|
|
5 |
"transformers_version": "4.34.1"
|
6 |
}
|
model.safetensors → pytorch_model.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1e01fe28eb61f7d317f3236e36c88be43e136ef4ae66a424e8c912a98141aba2
|
3 |
+
size 992115782
|
special_tokens_map.json
CHANGED
@@ -1,13 +1,13 @@
|
|
1 |
{
|
2 |
"bos_token": {
|
3 |
-
"content": "
|
4 |
"lstrip": false,
|
5 |
"normalized": false,
|
6 |
"rstrip": false,
|
7 |
"single_word": false
|
8 |
},
|
9 |
"eos_token": {
|
10 |
-
"content": "
|
11 |
"lstrip": false,
|
12 |
"normalized": false,
|
13 |
"rstrip": false,
|
|
|
1 |
{
|
2 |
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
"lstrip": false,
|
5 |
"normalized": false,
|
6 |
"rstrip": false,
|
7 |
"single_word": false
|
8 |
},
|
9 |
"eos_token": {
|
10 |
+
"content": "</s>",
|
11 |
"lstrip": false,
|
12 |
"normalized": false,
|
13 |
"rstrip": false,
|
tokenizer.json
CHANGED
@@ -80,7 +80,7 @@
|
|
80 |
"single": [
|
81 |
{
|
82 |
"SpecialToken": {
|
83 |
-
"id": "
|
84 |
"type_id": 0
|
85 |
}
|
86 |
},
|
@@ -94,7 +94,7 @@
|
|
94 |
"pair": [
|
95 |
{
|
96 |
"SpecialToken": {
|
97 |
-
"id": "
|
98 |
"type_id": 0
|
99 |
}
|
100 |
},
|
@@ -106,7 +106,7 @@
|
|
106 |
},
|
107 |
{
|
108 |
"SpecialToken": {
|
109 |
-
"id": "
|
110 |
"type_id": 1
|
111 |
}
|
112 |
},
|
@@ -118,13 +118,13 @@
|
|
118 |
}
|
119 |
],
|
120 |
"special_tokens": {
|
121 |
-
"
|
122 |
-
"id": "
|
123 |
"ids": [
|
124 |
-
|
125 |
],
|
126 |
"tokens": [
|
127 |
-
"
|
128 |
]
|
129 |
}
|
130 |
}
|
|
|
80 |
"single": [
|
81 |
{
|
82 |
"SpecialToken": {
|
83 |
+
"id": "<s>",
|
84 |
"type_id": 0
|
85 |
}
|
86 |
},
|
|
|
94 |
"pair": [
|
95 |
{
|
96 |
"SpecialToken": {
|
97 |
+
"id": "<s>",
|
98 |
"type_id": 0
|
99 |
}
|
100 |
},
|
|
|
106 |
},
|
107 |
{
|
108 |
"SpecialToken": {
|
109 |
+
"id": "<s>",
|
110 |
"type_id": 1
|
111 |
}
|
112 |
},
|
|
|
118 |
}
|
119 |
],
|
120 |
"special_tokens": {
|
121 |
+
"<s>": {
|
122 |
+
"id": "<s>",
|
123 |
"ids": [
|
124 |
+
1
|
125 |
],
|
126 |
"tokens": [
|
127 |
+
"<s>"
|
128 |
]
|
129 |
}
|
130 |
}
|
tokenizer_config.json
CHANGED
@@ -1,6 +1,4 @@
|
|
1 |
{
|
2 |
-
"add_bos_token": true,
|
3 |
-
"add_eos_token": false,
|
4 |
"added_tokens_decoder": {
|
5 |
"0": {
|
6 |
"content": "<unk>",
|
@@ -52,15 +50,19 @@
|
|
52 |
}
|
53 |
},
|
54 |
"additional_special_tokens": [],
|
55 |
-
"bos_token": "
|
56 |
"clean_up_tokenization_spaces": false,
|
57 |
-
"eos_token": "
|
58 |
"legacy": true,
|
59 |
-
"
|
|
|
60 |
"pad_token": "[PAD]",
|
61 |
"sp_model_kwargs": {},
|
62 |
"spaces_between_special_tokens": false,
|
|
|
63 |
"tokenizer_class": "LlamaTokenizer",
|
|
|
|
|
64 |
"unk_token": "<unk>",
|
65 |
"use_default_system_prompt": true
|
66 |
}
|
|
|
1 |
{
|
|
|
|
|
2 |
"added_tokens_decoder": {
|
3 |
"0": {
|
4 |
"content": "<unk>",
|
|
|
50 |
}
|
51 |
},
|
52 |
"additional_special_tokens": [],
|
53 |
+
"bos_token": "<s>",
|
54 |
"clean_up_tokenization_spaces": false,
|
55 |
+
"eos_token": "</s>",
|
56 |
"legacy": true,
|
57 |
+
"max_length": 1536,
|
58 |
+
"model_max_length": 4096,
|
59 |
"pad_token": "[PAD]",
|
60 |
"sp_model_kwargs": {},
|
61 |
"spaces_between_special_tokens": false,
|
62 |
+
"stride": 0,
|
63 |
"tokenizer_class": "LlamaTokenizer",
|
64 |
+
"truncation_side": "right",
|
65 |
+
"truncation_strategy": "longest_first",
|
66 |
"unk_token": "<unk>",
|
67 |
"use_default_system_prompt": true
|
68 |
}
|
training_params.json
DELETED
@@ -1,47 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"model": "Felladrin/TinyMistral-248M-Evol-Instruct",
|
3 |
-
"data_path": "data/",
|
4 |
-
"project_name": "TinyMistral-248M-Evol-Instruct",
|
5 |
-
"train_split": "train",
|
6 |
-
"valid_split": null,
|
7 |
-
"text_column": "text",
|
8 |
-
"rejected_text_column": "rejected",
|
9 |
-
"token": null,
|
10 |
-
"lr": 0.0002,
|
11 |
-
"epochs": 1,
|
12 |
-
"batch_size": 12,
|
13 |
-
"warmup_ratio": 0.1,
|
14 |
-
"gradient_accumulation": 4,
|
15 |
-
"optimizer": "adamw_torch",
|
16 |
-
"scheduler": "linear",
|
17 |
-
"weight_decay": 0.01,
|
18 |
-
"max_grad_norm": 1.0,
|
19 |
-
"seed": 42,
|
20 |
-
"add_eos_token": false,
|
21 |
-
"block_size": 1024,
|
22 |
-
"use_peft": false,
|
23 |
-
"lora_r": 16,
|
24 |
-
"lora_alpha": 32,
|
25 |
-
"lora_dropout": 0.05,
|
26 |
-
"logging_steps": -1,
|
27 |
-
"evaluation_strategy": "epoch",
|
28 |
-
"save_total_limit": 1,
|
29 |
-
"save_strategy": "epoch",
|
30 |
-
"auto_find_batch_size": false,
|
31 |
-
"fp16": false,
|
32 |
-
"push_to_hub": false,
|
33 |
-
"use_int8": true,
|
34 |
-
"model_max_length": 1024,
|
35 |
-
"repo_id": null,
|
36 |
-
"use_int4": false,
|
37 |
-
"trainer": "sft",
|
38 |
-
"target_modules": null,
|
39 |
-
"merge_adapter": true,
|
40 |
-
"username": null,
|
41 |
-
"use_flash_attention_2": false,
|
42 |
-
"log": "none",
|
43 |
-
"disable_gradient_checkpointing": false,
|
44 |
-
"model_ref": null,
|
45 |
-
"dpo_beta": 0.1,
|
46 |
-
"prompt_text_column": "prompt"
|
47 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|