Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -20,15 +20,19 @@ model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_c
|
|
20 |
|
21 |
@spaces.GPU(duration=120)
|
22 |
def predict(input_text, history):
|
23 |
-
|
24 |
for item in history:
|
25 |
-
|
26 |
-
|
|
|
|
|
|
|
27 |
|
28 |
inputs = tokenizer(conv, return_tensors="pt").to("cuda")
|
29 |
outputs = model.generate(**inputs, max_new_tokens=512)
|
30 |
|
31 |
generated_text = tokenizer.batch_decode(outputs)[0]
|
32 |
-
|
|
|
33 |
|
34 |
gr.ChatInterface(predict, theme="soft").launch()
|
|
|
20 |
|
21 |
@spaces.GPU(duration=120)
|
22 |
def predict(input_text, history):
|
23 |
+
chat = []
|
24 |
for item in history:
|
25 |
+
chat.append({"role": "user", "content": item[0]})
|
26 |
+
if item[1] is not None:
|
27 |
+
chat.append({"role": "assistant", "content": item[1]})
|
28 |
+
chat.append({"role": "user", "content": message})
|
29 |
+
conv = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
|
30 |
|
31 |
inputs = tokenizer(conv, return_tensors="pt").to("cuda")
|
32 |
outputs = model.generate(**inputs, max_new_tokens=512)
|
33 |
|
34 |
generated_text = tokenizer.batch_decode(outputs)[0]
|
35 |
+
generated_text = generated_text.split("<|assistant|>")[-1]
|
36 |
+
return generated_text
|
37 |
|
38 |
gr.ChatInterface(predict, theme="soft").launch()
|