Spaces:

nroggendorff
/

dolphin

Paused

nroggendorff commited on May 31, 2024

Commit

b2dcefa

verified ·

1 Parent(s): 48307dc

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,37 +5,16 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStream
 torch.set_default_device("cuda")
-tokenizer = AutoTokenizer.from_pretrained(
-    "cognitivecomputations/dolphin-2.9.1-mixtral-1x22b",
-    trust_remote_code=True
-)
-model = AutoModelForCausalLM.from_pretrained(
-    "cognitivecomputations/dolphin-2.9.1-mixtral-1x22b",
-    torch_dtype="auto",
-    load_in_4bit=True,
-    trust_remote_code=True
-)
-system_prompt = "<|im_start|>system\nYou are Dolphin, a helpful AI assistant.<|im_end|>"
 @spaces.GPU(duration=120)
 def predict(message, history):
-    history_transformer_format = history + [[message, ""]]
-    messages = system_prompt + "".join(["".join(["\n<|im_start|>user\n" + item[0], "<|im_end|>\n<|im_start|>assistant\n" + item[1]]) for item in history_transformer_format])
-    input_ids = tokenizer([messages], return_tensors="pt").input_ids
-    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
-    generate_kwargs = {
-        'input_ids': input_ids,
-        'streamer': streamer,
-        'max_new_tokens': 10000,
-        'do_sample': True,
-        'top_p': 0.95,
-        'top_k': 50,
-        'temperature': 0.7,
-        'num_beams': 1
-    }
-    output = model.generate(**generate_kwargs)
-    partial_message = streamer.decode(output[0], skip_special_tokens=True)
 gr.ChatInterface(predict).launch()

 torch.set_default_device("cuda")
+pipe = pipeline("text-generation", model="cognitivecomputations/dolphin-2.9.1-mixtral-1x22b")
 @spaces.GPU(duration=120)
 def predict(message, history):
+    conv = [{"role": "system", "content": "You are Dolphin, a helpful AI assistant."}]
+    for item in history:
+        conv.append({"role": "user", "content": item[0]})
+        conv.append({"role": "assistant", "content": item[1]})
+    conv.append({"role": "user", "content": message})
+    generated_text = pipe(conv, max_new_tokens=1024)[0]['generated_text'][-1]['content']
+    return generated_text
 gr.ChatInterface(predict).launch()