Kukedlc commited on
Commit
67f12c2
·
verified ·
1 Parent(s): 6417f9f

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +28 -7
README.md CHANGED
@@ -53,27 +53,48 @@ parameters:
53
  dtype: bfloat16
54
  ```
55
 
56
- ## 💻 Usage
57
 
58
  ```python
59
- !pip install -qU transformers accelerate
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
  from transformers import AutoTokenizer
62
  import transformers
63
  import torch
64
 
65
- model = "Kukedlc/NeuralTopBench-7B-ties"
66
- messages = [{"role": "user", "content": "What is a large language model?"}]
67
 
68
  tokenizer = AutoTokenizer.from_pretrained(model)
69
- prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
70
  pipeline = transformers.pipeline(
71
  "text-generation",
72
  model=model,
73
- torch_dtype=torch.float16,
74
- device_map="auto",
75
  )
76
 
 
 
77
  outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
78
  print(outputs[0]["generated_text"])
79
  ```
 
53
  dtype: bfloat16
54
  ```
55
 
56
+ ## 💻 Usage - Stream
57
 
58
  ```python
59
+ # Requirements
60
+ !pip install -qU transformers accelerate bitsandbytes
61
+
62
+ # Imports
63
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
64
+
65
+ # Model & Tokenizer
66
+ MODEL_NAME = model = "{{ username }}/{{ model_name }}"
67
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map='cuda:1', load_in_4bit=True)
68
+ tok = AutoTokenizer.from_pretrained(MODEL_NAME)
69
+
70
+ # Inference
71
+ inputs = tok(["I want you to generate a theory that unites quantum mechanics with the theory of relativity and cosmic consciousness"], return_tensors="pt").to('cuda')
72
+ streamer = TextStreamer(tok)
73
+
74
+ # Despite returning the usual output, the streamer will also print the generated text to stdout.
75
+ _ = model.generate(**inputs, streamer=streamer, max_new_tokens=512)
76
+
77
+ ```
78
+ ## 💻 Usage - Clasic
79
+
80
+ ```python
81
+ !pip install -qU transformers bitsandbytes accelerate
82
 
83
  from transformers import AutoTokenizer
84
  import transformers
85
  import torch
86
 
87
+ model = "{{ username }}/{{ model_name }}"
 
88
 
89
  tokenizer = AutoTokenizer.from_pretrained(model)
 
90
  pipeline = transformers.pipeline(
91
  "text-generation",
92
  model=model,
93
+ model_kwargs={"torch_dtype": torch.float16, "load_in_4bit": True},
 
94
  )
95
 
96
+ messages = [{"role": "user", "content": "Explain what a Mixture of Experts is in less than 100 words."}]
97
+ prompt = pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
98
  outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
99
  print(outputs[0]["generated_text"])
100
  ```