Files changed (1) hide show
  1. app.py +8 -4
app.py CHANGED
@@ -20,15 +20,19 @@ model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_c
20
 
21
  @spaces.GPU(duration=120)
22
  def predict(input_text, history):
23
- conv = ""
24
  for item in history:
25
- conv += f"<|user|>\n{item[0]}</s>\n<|assistant|>\n{item[1]}</s>\n"
26
- conv += f"<|user|>\n{input_text}</s>\n"
 
 
 
27
 
28
  inputs = tokenizer(conv, return_tensors="pt").to("cuda")
29
  outputs = model.generate(**inputs, max_new_tokens=512)
30
 
31
  generated_text = tokenizer.batch_decode(outputs)[0]
32
- return generated_text.split("<|assistant|>")[-1].strip()
 
33
 
34
  gr.ChatInterface(predict, theme="soft").launch()
 
20
 
21
  @spaces.GPU(duration=120)
22
  def predict(input_text, history):
23
+ chat = []
24
  for item in history:
25
+ chat.append({"role": "user", "content": item[0]})
26
+ if item[1] is not None:
27
+ chat.append({"role": "assistant", "content": item[1]})
28
+ chat.append({"role": "user", "content": message})
29
+ conv = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
30
 
31
  inputs = tokenizer(conv, return_tensors="pt").to("cuda")
32
  outputs = model.generate(**inputs, max_new_tokens=512)
33
 
34
  generated_text = tokenizer.batch_decode(outputs)[0]
35
+ generated_text = generated_text.split("<|assistant|>")[-1]
36
+ return generated_text
37
 
38
  gr.ChatInterface(predict, theme="soft").launch()