Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -98,7 +98,7 @@ def respond(
|
|
98 |
for output in stream:
|
99 |
outputs += output
|
100 |
token_count += len(output.split())
|
101 |
-
yield outputs
|
102 |
|
103 |
end_time = time.time()
|
104 |
latency = end_time - start_time
|
@@ -108,57 +108,11 @@ def respond(
|
|
108 |
|
109 |
description = """<p><center>
|
110 |
<a href="https://huggingface.co/hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF" target="_blank">[Meta Llama 3.2 (1B)]</a>
|
111 |
-
|
112 |
Meta Llama 3.2 (1B) is a multilingual large language model (LLM) optimized for conversational dialogue use cases, including agentic retrieval and summarization tasks. It outperforms many open-source and closed chat models on industry benchmarks, and is intended for commercial and research use in multiple languages.
|
113 |
-
|
114 |
</center></p>
|
115 |
"""
|
116 |
|
117 |
-
|
118 |
-
respond,
|
119 |
-
additional_inputs=[
|
120 |
-
gr.Dropdown([
|
121 |
-
"llama-3.2-1b-instruct-q4_k_m.gguf"
|
122 |
-
],
|
123 |
-
value="llama-3.2-1b-instruct-q4_k_m.gguf",
|
124 |
-
label="Model"
|
125 |
-
),
|
126 |
-
gr.TextArea(value="""You are Meta Llama 3.2 (1B), an advanced AI assistant created by Meta. Your capabilities include:
|
127 |
-
|
128 |
-
1. Complex reasoning and problem-solving
|
129 |
-
2. Multilingual understanding and generation
|
130 |
-
3. Creative and analytical writing
|
131 |
-
4. Code understanding and generation
|
132 |
-
5. Task decomposition and step-by-step guidance
|
133 |
-
6. Summarization and information extraction
|
134 |
-
|
135 |
-
Always strive for accuracy, clarity, and helpfulness in your responses. If you're unsure about something, express your uncertainty. Use the following format for your responses:
|
136 |
-
""", label="System message"),
|
137 |
-
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max tokens"),
|
138 |
-
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
139 |
-
gr.Slider(
|
140 |
-
minimum=0.1,
|
141 |
-
maximum=2.0,
|
142 |
-
value=0.9,
|
143 |
-
step=0.05,
|
144 |
-
label="Top-p",
|
145 |
-
),
|
146 |
-
gr.Slider(
|
147 |
-
minimum=0,
|
148 |
-
maximum=100,
|
149 |
-
value=1,
|
150 |
-
step=1,
|
151 |
-
label="Top-k",
|
152 |
-
),
|
153 |
-
gr.Slider(
|
154 |
-
minimum=0.0,
|
155 |
-
maximum=2.0,
|
156 |
-
value=1.1,
|
157 |
-
step=0.1,
|
158 |
-
label="Repetition penalty",
|
159 |
-
),
|
160 |
-
],
|
161 |
-
theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
|
162 |
body_background_fill_dark="#16141c",
|
163 |
block_background_fill_dark="#16141c",
|
164 |
block_border_width="1px",
|
@@ -170,31 +124,42 @@ Always strive for accuracy, clarity, and helpfulness in your responses. If you'r
|
|
170 |
background_fill_secondary_dark="#16141c",
|
171 |
color_accent_soft_dark="transparent",
|
172 |
code_background_fill_dark="#292733",
|
173 |
-
)
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
195 |
|
196 |
if __name__ == "__main__":
|
197 |
demo.launch()
|
|
|
198 |
# 旧版代码--------------------------------
|
199 |
# import gradio as gr
|
200 |
|
|
|
98 |
for output in stream:
|
99 |
outputs += output
|
100 |
token_count += len(output.split())
|
101 |
+
yield outputs, history + [(message, outputs)] # Update chatbot history
|
102 |
|
103 |
end_time = time.time()
|
104 |
latency = end_time - start_time
|
|
|
108 |
|
109 |
description = """<p><center>
|
110 |
<a href="https://huggingface.co/hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF" target="_blank">[Meta Llama 3.2 (1B)]</a>
|
|
|
111 |
Meta Llama 3.2 (1B) is a multilingual large language model (LLM) optimized for conversational dialogue use cases, including agentic retrieval and summarization tasks. It outperforms many open-source and closed chat models on industry benchmarks, and is intended for commercial and research use in multiple languages.
|
|
|
112 |
</center></p>
|
113 |
"""
|
114 |
|
115 |
+
with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
body_background_fill_dark="#16141c",
|
117 |
block_background_fill_dark="#16141c",
|
118 |
block_border_width="1px",
|
|
|
124 |
background_fill_secondary_dark="#16141c",
|
125 |
color_accent_soft_dark="transparent",
|
126 |
code_background_fill_dark="#292733",
|
127 |
+
)) as demo:
|
128 |
+
|
129 |
+
chatbot = gr.Chatbot(scale=1, show_copy_button=True) # 移除 likeable=True
|
130 |
+
message = gr.Textbox(label="Your message")
|
131 |
+
model_dropdown = gr.Dropdown(
|
132 |
+
["llama-3.2-1b-instruct-q4_k_m.gguf"],
|
133 |
+
value="llama-3.2-1b-instruct-q4_k_m.gguf",
|
134 |
+
label="Model"
|
135 |
+
)
|
136 |
+
system_message = gr.TextArea(value="""You are Meta Llama 3.2 (1B), an advanced AI assistant created by Meta. Your capabilities include:
|
137 |
+
1. Complex reasoning and problem-solving
|
138 |
+
2. Multilingual understanding and generation
|
139 |
+
3. Creative and analytical writing
|
140 |
+
4. Code understanding and generation
|
141 |
+
5. Task decomposition and step-by-step guidance
|
142 |
+
6. Summarization and information extraction
|
143 |
+
Always strive for accuracy, clarity, and helpfulness in your responses. If you're unsure about something, express your uncertainty. Use the following format for your responses:
|
144 |
+
""", label="System message")
|
145 |
+
max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max tokens")
|
146 |
+
temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
|
147 |
+
top_p = gr.Slider(minimum=0.1, maximum=2.0, value=0.9, step=0.05, label="Top-p")
|
148 |
+
top_k = gr.Slider(minimum=0, maximum=100, value=1, step=1, label="Top-k")
|
149 |
+
repeat_penalty = gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty")
|
150 |
+
|
151 |
+
history = gr.State([])
|
152 |
+
|
153 |
+
def chat_fn(message, history, model, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty):
|
154 |
+
return respond(message, history, model, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty)
|
155 |
+
|
156 |
+
message.submit(chat_fn, [message, history, model_dropdown, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty], [chatbot, history])
|
157 |
+
|
158 |
+
gr.Markdown(description)
|
159 |
|
160 |
if __name__ == "__main__":
|
161 |
demo.launch()
|
162 |
+
|
163 |
# 旧版代码--------------------------------
|
164 |
# import gradio as gr
|
165 |
|