Uploaded version 2024/4/17 with new prompt format including additional special tokens.
Browse files- README.md +9 -5
- added_tokens.json +4 -0
- config.json +1 -1
- model-00001-of-00010.safetensors +2 -2
- model-00002-of-00010.safetensors +1 -1
- model-00003-of-00010.safetensors +1 -1
- model-00004-of-00010.safetensors +1 -1
- model-00005-of-00010.safetensors +1 -1
- model-00006-of-00010.safetensors +1 -1
- model-00007-of-00010.safetensors +1 -1
- model-00008-of-00010.safetensors +1 -1
- model-00009-of-00010.safetensors +1 -1
- model-00010-of-00010.safetensors +2 -2
- model.safetensors.index.json +1 -1
- special_tokens_map.json +11 -0
- tokenizer.json +18 -0
- tokenizer_config.json +23 -2
README.md
CHANGED
@@ -13,11 +13,15 @@ Fine tuned on r/ChatGPT Discord #general dump. \
|
|
13 |
Merged into bfloat16 using [mistralai/Mixtral-8x7B-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1) as base and [v2ray/SchizoGPT-8x7B-QLoRA](https://huggingface.co/v2ray/SchizoGPT-8x7B-QLoRA) as QLoRA.
|
14 |
## Prompt Template
|
15 |
```
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
|
|
20 |
```
|
21 |
Use `@username` to ping a user and `#channel name` to mention a channel. \
|
22 |
Prepend `<Re: username>` before a message to respond to a user. \
|
23 |
-
Use `<filename.ext>` to mention a file in a link, for example, if you have `https://example.com/image.jpg`, use `<image.jpg
|
|
|
|
|
|
|
|
13 |
Merged into bfloat16 using [mistralai/Mixtral-8x7B-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1) as base and [v2ray/SchizoGPT-8x7B-QLoRA](https://huggingface.co/v2ray/SchizoGPT-8x7B-QLoRA) as QLoRA.
|
14 |
## Prompt Template
|
15 |
```
|
16 |
+
Date: 2024/4<username>username1<message>message 1<message>message 2<username>username2<message>message 1<message>message 2<username>username3<message>
|
17 |
+
```
|
18 |
+
Date prefix is optional:
|
19 |
+
```
|
20 |
+
<username>username1<message>message 1<message>message 2<username>username2<message>message 1<message>message 2<username>username3<message>
|
21 |
```
|
22 |
Use `@username` to ping a user and `#channel name` to mention a channel. \
|
23 |
Prepend `<Re: username>` before a message to respond to a user. \
|
24 |
+
Use `<filename.ext>` to mention a file in a link, for example, if you have `https://example.com/image.jpg`, use `<image.jpg>`:
|
25 |
+
```
|
26 |
+
Date: 2023/12<username>example#0001<message>Hello!<username>example#0002<message><Re: example#0001> Hi, look at this image of a cat! <cat.png><username>example#0001<message><Re: example#0002>
|
27 |
+
```
|
added_tokens.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"<message>": 32001,
|
3 |
+
"<username>": 32000
|
4 |
+
}
|
config.json
CHANGED
@@ -26,5 +26,5 @@
|
|
26 |
"torch_dtype": "bfloat16",
|
27 |
"transformers_version": "4.39.3",
|
28 |
"use_cache": true,
|
29 |
-
"vocab_size":
|
30 |
}
|
|
|
26 |
"torch_dtype": "bfloat16",
|
27 |
"transformers_version": "4.39.3",
|
28 |
"use_cache": true,
|
29 |
+
"vocab_size": 32002
|
30 |
}
|
model-00001-of-00010.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:169768a67eec792b9df7780dbaa87bc4be7ffa29d9cd63a4de84a981674d6286
|
3 |
+
size 9993270608
|
model-00002-of-00010.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 9999479840
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc4c9e0588ed00b76ab6adb97e1598e2b412e2e43e9b263c98729020b6a543c9
|
3 |
size 9999479840
|
model-00003-of-00010.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 9966008016
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:92e988401c12e7e6a8f90d51d2a7b608c645bd589dd7525b91cbe9984c615d0e
|
3 |
size 9966008016
|
model-00004-of-00010.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 9999479944
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cdff4626ee7ca4851b068f63ddc987c77e26294f157e09acba112e40b32d60ce
|
3 |
size 9999479944
|
model-00005-of-00010.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 9966008120
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c423e668c5eeb73848ac42216f225d857f4a6578c209f79f0fe75dba81b64e2
|
3 |
size 9966008120
|
model-00006-of-00010.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 9999479944
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a5aaf26a91be244a0ab7c0f718d43e6eb9f0564218067ea55e4b7ee99af5842
|
3 |
size 9999479944
|
model-00007-of-00010.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 9999496560
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d85b85b153d3fa2e5af69ca17655b1a3e59f429bb336f1cc9c5d57c78e6d93e
|
3 |
size 9999496560
|
model-00008-of-00010.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 9965991512
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b03f1aa2485608a60e96cfa451d1156c4623e6b9e25420a3f6439a8f5f6a94d
|
3 |
size 9965991512
|
model-00009-of-00010.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 9999479944
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fa37c7c2cf490e005ae2ad2d4c4fe5cda86b9d09fffb922328d658c5481e4b77
|
3 |
size 9999479944
|
model-00010-of-00010.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:614d387dcf77c8e2b9338a0028a82c11d4850a1750a7d2a4a6f5a40fe786e830
|
3 |
+
size 3517051608
|
model.safetensors.index.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
-
"total_size":
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"lm_head.weight": "model-00010-of-00010.safetensors",
|
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
+
"total_size": 93405618176
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"lm_head.weight": "model-00010-of-00010.safetensors",
|
special_tokens_map.json
CHANGED
@@ -1,4 +1,8 @@
|
|
1 |
{
|
|
|
|
|
|
|
|
|
2 |
"bos_token": {
|
3 |
"content": "<s>",
|
4 |
"lstrip": false,
|
@@ -13,6 +17,13 @@
|
|
13 |
"rstrip": false,
|
14 |
"single_word": false
|
15 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
"unk_token": {
|
17 |
"content": "<unk>",
|
18 |
"lstrip": false,
|
|
|
1 |
{
|
2 |
+
"additional_special_tokens": [
|
3 |
+
"<username>",
|
4 |
+
"<message>"
|
5 |
+
],
|
6 |
"bos_token": {
|
7 |
"content": "<s>",
|
8 |
"lstrip": false,
|
|
|
17 |
"rstrip": false,
|
18 |
"single_word": false
|
19 |
},
|
20 |
+
"pad_token": {
|
21 |
+
"content": "<unk>",
|
22 |
+
"lstrip": false,
|
23 |
+
"normalized": false,
|
24 |
+
"rstrip": false,
|
25 |
+
"single_word": false
|
26 |
+
},
|
27 |
"unk_token": {
|
28 |
"content": "<unk>",
|
29 |
"lstrip": false,
|
tokenizer.json
CHANGED
@@ -29,6 +29,24 @@
|
|
29 |
"rstrip": false,
|
30 |
"normalized": false,
|
31 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
}
|
33 |
],
|
34 |
"normalizer": {
|
|
|
29 |
"rstrip": false,
|
30 |
"normalized": false,
|
31 |
"special": true
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"id": 32000,
|
35 |
+
"content": "<username>",
|
36 |
+
"single_word": false,
|
37 |
+
"lstrip": false,
|
38 |
+
"rstrip": false,
|
39 |
+
"normalized": false,
|
40 |
+
"special": true
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"id": 32001,
|
44 |
+
"content": "<message>",
|
45 |
+
"single_word": false,
|
46 |
+
"lstrip": false,
|
47 |
+
"rstrip": false,
|
48 |
+
"normalized": false,
|
49 |
+
"special": true
|
50 |
}
|
51 |
],
|
52 |
"normalizer": {
|
tokenizer_config.json
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
{
|
2 |
"add_bos_token": true,
|
3 |
"add_eos_token": false,
|
|
|
4 |
"added_tokens_decoder": {
|
5 |
"0": {
|
6 |
"content": "<unk>",
|
@@ -25,15 +26,35 @@
|
|
25 |
"rstrip": false,
|
26 |
"single_word": false,
|
27 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
}
|
29 |
},
|
30 |
-
"additional_special_tokens": [
|
|
|
|
|
|
|
31 |
"bos_token": "<s>",
|
32 |
"clean_up_tokenization_spaces": false,
|
33 |
"eos_token": "</s>",
|
34 |
"legacy": true,
|
35 |
"model_max_length": 1000000000000000019884624838656,
|
36 |
-
"pad_token":
|
|
|
37 |
"sp_model_kwargs": {},
|
38 |
"spaces_between_special_tokens": false,
|
39 |
"tokenizer_class": "LlamaTokenizer",
|
|
|
1 |
{
|
2 |
"add_bos_token": true,
|
3 |
"add_eos_token": false,
|
4 |
+
"add_prefix_space": true,
|
5 |
"added_tokens_decoder": {
|
6 |
"0": {
|
7 |
"content": "<unk>",
|
|
|
26 |
"rstrip": false,
|
27 |
"single_word": false,
|
28 |
"special": true
|
29 |
+
},
|
30 |
+
"32000": {
|
31 |
+
"content": "<username>",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false,
|
36 |
+
"special": true
|
37 |
+
},
|
38 |
+
"32001": {
|
39 |
+
"content": "<message>",
|
40 |
+
"lstrip": false,
|
41 |
+
"normalized": false,
|
42 |
+
"rstrip": false,
|
43 |
+
"single_word": false,
|
44 |
+
"special": true
|
45 |
}
|
46 |
},
|
47 |
+
"additional_special_tokens": [
|
48 |
+
"<username>",
|
49 |
+
"<message>"
|
50 |
+
],
|
51 |
"bos_token": "<s>",
|
52 |
"clean_up_tokenization_spaces": false,
|
53 |
"eos_token": "</s>",
|
54 |
"legacy": true,
|
55 |
"model_max_length": 1000000000000000019884624838656,
|
56 |
+
"pad_token": "<unk>",
|
57 |
+
"padding_side": "right",
|
58 |
"sp_model_kwargs": {},
|
59 |
"spaces_between_special_tokens": false,
|
60 |
"tokenizer_class": "LlamaTokenizer",
|