ZhongJingGPT

Running on Zero

CMLL commited on Jun 18

Commit

a777a95

•

1 Parent(s): 92b045a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,7 +7,8 @@ import gradio as gr
 peft_model_id = "CMLM/ZhongJing-2-1_8b"
 base_model_id = "Qwen/Qwen1.5-1.8B-Chat"
-device = "cuda"
 model = AutoModelForCausalLM.from_pretrained(base_model_id, device_map={"": device}).to(device)
 model.load_adapter(peft_model_id)
 tokenizer = AutoTokenizer.from_pretrained(
@@ -35,7 +36,7 @@ def multi_turn_chat(question, chat_history=None):
     try:
         # Generate the response from the model
         outputs = model.generate(model_inputs.input_ids, max_new_tokens=512)
-        generated_ids = outputs[:, model_inputs.input_ids.shape[-1]:]
         response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
     except Exception as e:
         raise RuntimeError("Error in model generation: " + str(e))
@@ -73,4 +74,3 @@ with gr.Blocks() as multi_turn_interface:
     clear_button.click(clear_history, [], [chatbot, state])
 multi_turn_interface.launch()

 peft_model_id = "CMLM/ZhongJing-2-1_8b"
 base_model_id = "Qwen/Qwen1.5-1.8B-Chat"
+device = torch.device("cuda")
 model = AutoModelForCausalLM.from_pretrained(base_model_id, device_map={"": device}).to(device)
 model.load_adapter(peft_model_id)
 tokenizer = AutoTokenizer.from_pretrained(
     try:
         # Generate the response from the model
         outputs = model.generate(model_inputs.input_ids, max_new_tokens=512)
+        generated_ids = outputs[:, model_inputs.input_ids.shape[-1]:].to(device)
         response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
     except Exception as e:
         raise RuntimeError("Error in model generation: " + str(e))
     clear_button.click(clear_history, [], [chatbot, state])
 multi_turn_interface.launch()