CMLL commited on
Commit
a777a95
1 Parent(s): 92b045a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -7,7 +7,8 @@ import gradio as gr
7
  peft_model_id = "CMLM/ZhongJing-2-1_8b"
8
  base_model_id = "Qwen/Qwen1.5-1.8B-Chat"
9
 
10
- device = "cuda"
 
11
  model = AutoModelForCausalLM.from_pretrained(base_model_id, device_map={"": device}).to(device)
12
  model.load_adapter(peft_model_id)
13
  tokenizer = AutoTokenizer.from_pretrained(
@@ -35,7 +36,7 @@ def multi_turn_chat(question, chat_history=None):
35
  try:
36
  # Generate the response from the model
37
  outputs = model.generate(model_inputs.input_ids, max_new_tokens=512)
38
- generated_ids = outputs[:, model_inputs.input_ids.shape[-1]:]
39
  response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
40
  except Exception as e:
41
  raise RuntimeError("Error in model generation: " + str(e))
@@ -73,4 +74,3 @@ with gr.Blocks() as multi_turn_interface:
73
  clear_button.click(clear_history, [], [chatbot, state])
74
 
75
  multi_turn_interface.launch()
76
-
 
7
  peft_model_id = "CMLM/ZhongJing-2-1_8b"
8
  base_model_id = "Qwen/Qwen1.5-1.8B-Chat"
9
 
10
+ device = torch.device("cuda")
11
+
12
  model = AutoModelForCausalLM.from_pretrained(base_model_id, device_map={"": device}).to(device)
13
  model.load_adapter(peft_model_id)
14
  tokenizer = AutoTokenizer.from_pretrained(
 
36
  try:
37
  # Generate the response from the model
38
  outputs = model.generate(model_inputs.input_ids, max_new_tokens=512)
39
+ generated_ids = outputs[:, model_inputs.input_ids.shape[-1]:].to(device)
40
  response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
41
  except Exception as e:
42
  raise RuntimeError("Error in model generation: " + str(e))
 
74
  clear_button.click(clear_history, [], [chatbot, state])
75
 
76
  multi_turn_interface.launch()