Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -10,20 +10,23 @@ from llama_cpp import Llama
|
|
10 |
from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
|
11 |
from llama_cpp_agent.providers import LlamaCppPythonProvider
|
12 |
from llama_cpp_agent.chat_history import BasicChatHistory
|
13 |
-
from llama_cpp_agent.chat_history.messages import Roles
|
14 |
import gradio as gr
|
15 |
from huggingface_hub import hf_hub_download
|
16 |
|
17 |
llm = None
|
18 |
llm_model = None
|
19 |
|
20 |
-
# Download the
|
|
|
|
|
21 |
hf_hub_download(
|
22 |
repo_id="Cran-May/openbuddy-llama3.2-3b-v23.2-131k-Q5_K_M-GGUF",
|
23 |
filename="openbuddy-llama3.2-3b-v23.2-131k-q5_k_m-imat.gguf",
|
24 |
-
local_dir=
|
25 |
)
|
26 |
|
|
|
27 |
def get_messages_formatter_type(model_name):
|
28 |
return MessagesFormatterType.LLAMA_3
|
29 |
|
@@ -31,36 +34,29 @@ def chat_fn(message, history, model, system_message, max_tokens, temperature, to
|
|
31 |
history_list = history or []
|
32 |
response_generator = respond(message, history_list, model, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty)
|
33 |
|
34 |
-
full_response = ""
|
35 |
for current_history in response_generator:
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
max_tokens,
|
46 |
-
temperature,
|
47 |
-
top_p,
|
48 |
-
top_k,
|
49 |
-
repeat_penalty,
|
50 |
-
):
|
51 |
global llm
|
52 |
global llm_model
|
53 |
-
|
54 |
chat_template = get_messages_formatter_type(model)
|
55 |
|
56 |
if llm is None or llm_model != model:
|
57 |
llm = Llama(
|
58 |
-
model_path=f"
|
59 |
n_gpu_layers=0,
|
60 |
-
n_batch=4096,
|
61 |
-
n_ctx=8192,
|
62 |
-
n_threads=
|
63 |
-
f16_kv=True,
|
64 |
)
|
65 |
llm_model = model
|
66 |
|
@@ -77,7 +73,7 @@ def respond(
|
|
77 |
settings.temperature = temperature
|
78 |
settings.top_k = top_k
|
79 |
settings.top_p = top_p
|
80 |
-
settings.max_tokens = min(max_tokens, 8192)
|
81 |
settings.repeat_penalty = repeat_penalty
|
82 |
settings.stream = True
|
83 |
|
@@ -121,6 +117,7 @@ def respond(
|
|
121 |
print(f"Latency: {latency} seconds")
|
122 |
print(f"Speed: {speed} tokens/second")
|
123 |
|
|
|
124 |
description = """<p><center>
|
125 |
<a href="https://huggingface.co/hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF" target="_blank">[Meta Llama 3.2 (1B)]</a>
|
126 |
Meta Llama 3.2 (1B) is a multilingual large language model (LLM) optimized for conversational dialogue use cases, including agentic retrieval and summarization tasks. It outperforms many open-source and closed chat models on industry benchmarks, and is intended for commercial and research use in multiple languages.
|
@@ -141,7 +138,7 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet"
|
|
141 |
code_background_fill_dark="#292733",
|
142 |
)) as demo:
|
143 |
|
144 |
-
chatbot = gr.Chatbot(scale=1, show_copy_button=True, type='messages')
|
145 |
message = gr.Textbox(label="Your message")
|
146 |
model_dropdown = gr.Dropdown(
|
147 |
["openbuddy-llama3.2-3b-v23.2-131k-q5_k_m-imat.gguf"],
|
|
|
10 |
from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
|
11 |
from llama_cpp_agent.providers import LlamaCppPythonProvider
|
12 |
from llama_cpp_agent.chat_history import BasicChatHistory
|
13 |
+
from llama_cpp_agent.chat_history.messages import Roles, ChatMessage
|
14 |
import gradio as gr
|
15 |
from huggingface_hub import hf_hub_download
|
16 |
|
17 |
llm = None
|
18 |
llm_model = None
|
19 |
|
20 |
+
# Download the model (ensure the directory exists)
|
21 |
+
model_dir = "./models"
|
22 |
+
os.makedirs(model_dir, exist_ok=True)
|
23 |
hf_hub_download(
|
24 |
repo_id="Cran-May/openbuddy-llama3.2-3b-v23.2-131k-Q5_K_M-GGUF",
|
25 |
filename="openbuddy-llama3.2-3b-v23.2-131k-q5_k_m-imat.gguf",
|
26 |
+
local_dir=model_dir
|
27 |
)
|
28 |
|
29 |
+
|
30 |
def get_messages_formatter_type(model_name):
|
31 |
return MessagesFormatterType.LLAMA_3
|
32 |
|
|
|
34 |
history_list = history or []
|
35 |
response_generator = respond(message, history_list, model, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty)
|
36 |
|
|
|
37 |
for current_history in response_generator:
|
38 |
+
messages = []
|
39 |
+
for user_msg, bot_msg in current_history:
|
40 |
+
messages.append(ChatMessage(role="user", content=user_msg))
|
41 |
+
messages.append(ChatMessage(role="assistant", content=bot_msg))
|
42 |
+
|
43 |
+
yield messages, history
|
44 |
+
|
45 |
+
|
46 |
+
def respond(message, history, model, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty):
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
global llm
|
48 |
global llm_model
|
49 |
+
|
50 |
chat_template = get_messages_formatter_type(model)
|
51 |
|
52 |
if llm is None or llm_model != model:
|
53 |
llm = Llama(
|
54 |
+
model_path=f"{model_dir}/{model}",
|
55 |
n_gpu_layers=0,
|
56 |
+
n_batch=4096,
|
57 |
+
n_ctx=8192,
|
58 |
+
n_threads=os.cpu_count(),
|
59 |
+
f16_kv=True,
|
60 |
)
|
61 |
llm_model = model
|
62 |
|
|
|
73 |
settings.temperature = temperature
|
74 |
settings.top_k = top_k
|
75 |
settings.top_p = top_p
|
76 |
+
settings.max_tokens = min(max_tokens, 8192)
|
77 |
settings.repeat_penalty = repeat_penalty
|
78 |
settings.stream = True
|
79 |
|
|
|
117 |
print(f"Latency: {latency} seconds")
|
118 |
print(f"Speed: {speed} tokens/second")
|
119 |
|
120 |
+
|
121 |
description = """<p><center>
|
122 |
<a href="https://huggingface.co/hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF" target="_blank">[Meta Llama 3.2 (1B)]</a>
|
123 |
Meta Llama 3.2 (1B) is a multilingual large language model (LLM) optimized for conversational dialogue use cases, including agentic retrieval and summarization tasks. It outperforms many open-source and closed chat models on industry benchmarks, and is intended for commercial and research use in multiple languages.
|
|
|
138 |
code_background_fill_dark="#292733",
|
139 |
)) as demo:
|
140 |
|
141 |
+
chatbot = gr.Chatbot(scale=1, show_copy_button=True, type='messages')
|
142 |
message = gr.Textbox(label="Your message")
|
143 |
model_dropdown = gr.Dropdown(
|
144 |
["openbuddy-llama3.2-3b-v23.2-131k-q5_k_m-imat.gguf"],
|