Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -7,7 +7,8 @@ import gradio as gr
|
|
7 |
peft_model_id = "CMLM/ZhongJing-2-1_8b"
|
8 |
base_model_id = "Qwen/Qwen1.5-1.8B-Chat"
|
9 |
|
10 |
-
device = "cuda"
|
|
|
11 |
model = AutoModelForCausalLM.from_pretrained(base_model_id, device_map={"": device}).to(device)
|
12 |
model.load_adapter(peft_model_id)
|
13 |
tokenizer = AutoTokenizer.from_pretrained(
|
@@ -35,7 +36,7 @@ def multi_turn_chat(question, chat_history=None):
|
|
35 |
try:
|
36 |
# Generate the response from the model
|
37 |
outputs = model.generate(model_inputs.input_ids, max_new_tokens=512)
|
38 |
-
generated_ids = outputs[:, model_inputs.input_ids.shape[-1]:]
|
39 |
response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
|
40 |
except Exception as e:
|
41 |
raise RuntimeError("Error in model generation: " + str(e))
|
@@ -73,4 +74,3 @@ with gr.Blocks() as multi_turn_interface:
|
|
73 |
clear_button.click(clear_history, [], [chatbot, state])
|
74 |
|
75 |
multi_turn_interface.launch()
|
76 |
-
|
|
|
7 |
peft_model_id = "CMLM/ZhongJing-2-1_8b"
|
8 |
base_model_id = "Qwen/Qwen1.5-1.8B-Chat"
|
9 |
|
10 |
+
device = torch.device("cuda")
|
11 |
+
|
12 |
model = AutoModelForCausalLM.from_pretrained(base_model_id, device_map={"": device}).to(device)
|
13 |
model.load_adapter(peft_model_id)
|
14 |
tokenizer = AutoTokenizer.from_pretrained(
|
|
|
36 |
try:
|
37 |
# Generate the response from the model
|
38 |
outputs = model.generate(model_inputs.input_ids, max_new_tokens=512)
|
39 |
+
generated_ids = outputs[:, model_inputs.input_ids.shape[-1]:].to(device)
|
40 |
response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
|
41 |
except Exception as e:
|
42 |
raise RuntimeError("Error in model generation: " + str(e))
|
|
|
74 |
clear_button.click(clear_history, [], [chatbot, state])
|
75 |
|
76 |
multi_turn_interface.launch()
|
|