Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,25 +1,19 @@
|
|
1 |
-
import spaces
|
2 |
import gradio as gr
|
3 |
-
from transformers import AutoTokenizer
|
4 |
-
from peft import PeftModel
|
5 |
-
import torch
|
6 |
import os
|
7 |
|
8 |
-
|
9 |
# 获取 Hugging Face 访问令牌
|
10 |
hf_token = os.getenv("HF_API_TOKEN")
|
11 |
|
12 |
# 定义基础模型名称
|
13 |
-
base_model_name = "larry1129/meta-llama-3.1-8b-bnb-4bit"
|
14 |
|
15 |
# 定义 adapter 模型名称
|
16 |
-
adapter_model_name = "larry1129/WooWoof_AI"
|
17 |
-
|
18 |
-
# 加载分词器(无需 GPU,可在全局加载)
|
19 |
-
tokenizer = AutoTokenizer.from_pretrained(base_model_name, use_auth_token=hf_token)
|
20 |
|
21 |
-
#
|
22 |
model = None
|
|
|
23 |
|
24 |
# 定义提示生成函数
|
25 |
def generate_prompt(instruction, input_text=""):
|
@@ -38,18 +32,24 @@ def generate_prompt(instruction, input_text=""):
|
|
38 |
return prompt
|
39 |
|
40 |
# 定义生成响应的函数,并使用 @spaces.GPU 装饰
|
41 |
-
@spaces.GPU(duration=120)
|
42 |
def generate_response(instruction, input_text):
|
43 |
-
global model
|
44 |
|
45 |
if model is None:
|
46 |
-
#
|
47 |
-
import
|
48 |
-
|
|
|
|
|
49 |
|
50 |
# 在函数内部导入需要 GPU 的库
|
51 |
-
import
|
52 |
-
from transformers import AutoModelForCausalLM
|
|
|
|
|
|
|
|
|
53 |
|
54 |
# 加载基础模型
|
55 |
base_model = AutoModelForCausalLM.from_pretrained(
|
@@ -57,7 +57,7 @@ def generate_response(instruction, input_text):
|
|
57 |
device_map="auto",
|
58 |
torch_dtype=torch.float16,
|
59 |
use_auth_token=hf_token,
|
60 |
-
trust_remote_code=True
|
61 |
)
|
62 |
|
63 |
# 加载 adapter 并将其应用到基础模型上
|
@@ -74,6 +74,9 @@ def generate_response(instruction, input_text):
|
|
74 |
|
75 |
# 切换到评估模式
|
76 |
model.eval()
|
|
|
|
|
|
|
77 |
|
78 |
# 生成提示
|
79 |
prompt = generate_prompt(instruction, input_text)
|
@@ -82,7 +85,7 @@ def generate_response(instruction, input_text):
|
|
82 |
with torch.no_grad():
|
83 |
outputs = model.generate(
|
84 |
input_ids=inputs["input_ids"],
|
85 |
-
attention_mask=inputs
|
86 |
max_new_tokens=128,
|
87 |
temperature=0.7,
|
88 |
top_p=0.95,
|
|
|
1 |
+
import spaces # 必须在最顶部导入
|
2 |
import gradio as gr
|
|
|
|
|
|
|
3 |
import os
|
4 |
|
|
|
5 |
# 获取 Hugging Face 访问令牌
|
6 |
hf_token = os.getenv("HF_API_TOKEN")
|
7 |
|
8 |
# 定义基础模型名称
|
9 |
+
base_model_name = "larry1129/meta-llama-3.1-8b-bnb-4bit"
|
10 |
|
11 |
# 定义 adapter 模型名称
|
12 |
+
adapter_model_name = "larry1129/WooWoof_AI"
|
|
|
|
|
|
|
13 |
|
14 |
+
# 定义全局变量用于缓存模型和分词器
|
15 |
model = None
|
16 |
+
tokenizer = None
|
17 |
|
18 |
# 定义提示生成函数
|
19 |
def generate_prompt(instruction, input_text=""):
|
|
|
32 |
return prompt
|
33 |
|
34 |
# 定义生成响应的函数,并使用 @spaces.GPU 装饰
|
35 |
+
@spaces.GPU(duration=120)
|
36 |
def generate_response(instruction, input_text):
|
37 |
+
global model, tokenizer
|
38 |
|
39 |
if model is None:
|
40 |
+
# 检查 bitsandbytes 是否已安装
|
41 |
+
import importlib.util
|
42 |
+
if importlib.util.find_spec("bitsandbytes") is None:
|
43 |
+
import subprocess
|
44 |
+
subprocess.call(["pip", "install", "--upgrade", "bitsandbytes"])
|
45 |
|
46 |
# 在函数内部导入需要 GPU 的库
|
47 |
+
import torch
|
48 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
49 |
+
from peft import PeftModel
|
50 |
+
|
51 |
+
# 加载分词器
|
52 |
+
tokenizer = AutoTokenizer.from_pretrained(base_model_name, use_auth_token=hf_token)
|
53 |
|
54 |
# 加载基础模型
|
55 |
base_model = AutoModelForCausalLM.from_pretrained(
|
|
|
57 |
device_map="auto",
|
58 |
torch_dtype=torch.float16,
|
59 |
use_auth_token=hf_token,
|
60 |
+
trust_remote_code=True
|
61 |
)
|
62 |
|
63 |
# 加载 adapter 并将其应用到基础模型上
|
|
|
74 |
|
75 |
# 切换到评估模式
|
76 |
model.eval()
|
77 |
+
else:
|
78 |
+
# 在函数内部导入需要的库
|
79 |
+
import torch
|
80 |
|
81 |
# 生成提示
|
82 |
prompt = generate_prompt(instruction, input_text)
|
|
|
85 |
with torch.no_grad():
|
86 |
outputs = model.generate(
|
87 |
input_ids=inputs["input_ids"],
|
88 |
+
attention_mask=inputs.get("attention_mask"),
|
89 |
max_new_tokens=128,
|
90 |
temperature=0.7,
|
91 |
top_p=0.95,
|