Spaces:
Paused
Paused
nroggendorff
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -5,37 +5,16 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStream
|
|
5 |
|
6 |
torch.set_default_device("cuda")
|
7 |
|
8 |
-
|
9 |
-
"cognitivecomputations/dolphin-2.9.1-mixtral-1x22b",
|
10 |
-
trust_remote_code=True
|
11 |
-
)
|
12 |
-
|
13 |
-
model = AutoModelForCausalLM.from_pretrained(
|
14 |
-
"cognitivecomputations/dolphin-2.9.1-mixtral-1x22b",
|
15 |
-
torch_dtype="auto",
|
16 |
-
load_in_4bit=True,
|
17 |
-
trust_remote_code=True
|
18 |
-
)
|
19 |
-
|
20 |
-
system_prompt = "<|im_start|>system\nYou are Dolphin, a helpful AI assistant.<|im_end|>"
|
21 |
|
22 |
@spaces.GPU(duration=120)
|
23 |
def predict(message, history):
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
'max_new_tokens': 10000,
|
32 |
-
'do_sample': True,
|
33 |
-
'top_p': 0.95,
|
34 |
-
'top_k': 50,
|
35 |
-
'temperature': 0.7,
|
36 |
-
'num_beams': 1
|
37 |
-
}
|
38 |
-
output = model.generate(**generate_kwargs)
|
39 |
-
partial_message = streamer.decode(output[0], skip_special_tokens=True)
|
40 |
|
41 |
gr.ChatInterface(predict).launch()
|
|
|
5 |
|
6 |
torch.set_default_device("cuda")
|
7 |
|
8 |
+
pipe = pipeline("text-generation", model="cognitivecomputations/dolphin-2.9.1-mixtral-1x22b")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
@spaces.GPU(duration=120)
|
11 |
def predict(message, history):
|
12 |
+
conv = [{"role": "system", "content": "You are Dolphin, a helpful AI assistant."}]
|
13 |
+
for item in history:
|
14 |
+
conv.append({"role": "user", "content": item[0]})
|
15 |
+
conv.append({"role": "assistant", "content": item[1]})
|
16 |
+
conv.append({"role": "user", "content": message})
|
17 |
+
generated_text = pipe(conv, max_new_tokens=1024)[0]['generated_text'][-1]['content']
|
18 |
+
return generated_text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
gr.ChatInterface(predict).launch()
|