vvsotnikov commited on
Commit
117a240
·
1 Parent(s): 5bacb10
README.md CHANGED
@@ -1,3 +1,54 @@
1
  ---
2
- license: cc-by-sa-4.0
 
 
 
 
 
 
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ language:
3
+ - en
4
+ tags:
5
+ - causal-lm
6
+ license: cc-by-nc-sa-4.0
7
+ datasets:
8
+ - dmayhem93/ChatCombined
9
+ - tatsu-lab/alpaca
10
+ - nomic-ai/gpt4all_prompt_generations
11
+ - Dahoas/full-hh-rlhf
12
+ - jeffwan/sharegpt_vicuna
13
+ - HuggingFaceH4/databricks_dolly_15k
14
  ---
15
+
16
+ # StableLM-Tuned-Alpha 16-bit
17
+
18
+ ## Model Description
19
+
20
+ 16-bit version of `StableLM-Tuned-Alpha` compressed for the sake of speed and memory usage. No other changes were made. Original model: https://huggingface.co/stabilityai/stablelm-tuned-alpha-3b
21
+
22
+ ## Usage
23
+
24
+ Get started chatting with `StableLM-Tuned-Alpha 16-bit` by using the following code snippet:
25
+
26
+ ```python
27
+ from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList
28
+ tokenizer = AutoTokenizer.from_pretrained("vvsotnikov/stablelm-tuned-alpha-3b-16bit")
29
+ model = AutoModelForCausalLM.from_pretrained("vvsotnikov/stablelm-tuned-alpha-3b-16bit")
30
+ model.cuda()
31
+ class StopOnTokens(StoppingCriteria):
32
+ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
33
+ stop_ids = [50278, 50279, 50277, 1, 0]
34
+ for stop_id in stop_ids:
35
+ if input_ids[0][-1] == stop_id:
36
+ return True
37
+ return False
38
+ system_prompt = """<|SYSTEM|># StableLM Tuned (Alpha version)
39
+ - StableLM is a helpful and harmless open-source AI language model developed by StabilityAI.
40
+ - StableLM is excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
41
+ - StableLM is more than just an information source, StableLM is also able to write poetry, short stories, and make jokes.
42
+ - StableLM will refuse to participate in anything that could harm a human.
43
+ """
44
+ prompt = f"{system_prompt}<|USER|>What's your mood today?<|ASSISTANT|>"
45
+ inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
46
+ tokens = model.generate(
47
+ **inputs,
48
+ max_new_tokens=64,
49
+ temperature=0.7,
50
+ do_sample=True,
51
+ stopping_criteria=StoppingCriteriaList([StopOnTokens()])
52
+ )
53
+ print(tokenizer.decode(tokens[0], skip_special_tokens=True))
54
+ ```
config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "stabilityai/stablelm-tuned-alpha-3b",
3
+ "architectures": [
4
+ "GPTNeoXForCausalLM"
5
+ ],
6
+ "bos_token_id": 0,
7
+ "eos_token_id": 0,
8
+ "hidden_act": "gelu",
9
+ "hidden_size": 4096,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 16384,
12
+ "layer_norm_eps": 1e-05,
13
+ "max_position_embeddings": 4096,
14
+ "model_type": "gpt_neox",
15
+ "num_attention_heads": 32,
16
+ "num_hidden_layers": 16,
17
+ "rotary_emb_base": 10000,
18
+ "rotary_pct": 0.25,
19
+ "tie_word_embeddings": false,
20
+ "torch_dtype": "float16",
21
+ "transformers_version": "4.28.1",
22
+ "use_cache": true,
23
+ "use_parallel_residual": true,
24
+ "vocab_size": 50688
25
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "eos_token_id": 0,
5
+ "transformers_version": "4.28.1"
6
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61d771cb399d04e2cdbd436235dec9b780475f91f6c0b936c84889563d12639b
3
+ size 7543162373
special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "unk_token": "<|endoftext|>"
5
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<|endoftext|>",
4
+ "clean_up_tokenization_spaces": true,
5
+ "eos_token": "<|endoftext|>",
6
+ "model_max_length": 1000000000000000019884624838656,
7
+ "tokenizer_class": "GPTNeoXTokenizer",
8
+ "unk_token": "<|endoftext|>"
9
+ }