Cran-May commited on
Commit
cef1b0c
1 Parent(s): dd432ec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -70
app.py CHANGED
@@ -98,7 +98,7 @@ def respond(
98
  for output in stream:
99
  outputs += output
100
  token_count += len(output.split())
101
- yield outputs
102
 
103
  end_time = time.time()
104
  latency = end_time - start_time
@@ -108,57 +108,11 @@ def respond(
108
 
109
  description = """<p><center>
110
  <a href="https://huggingface.co/hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF" target="_blank">[Meta Llama 3.2 (1B)]</a>
111
-
112
  Meta Llama 3.2 (1B) is a multilingual large language model (LLM) optimized for conversational dialogue use cases, including agentic retrieval and summarization tasks. It outperforms many open-source and closed chat models on industry benchmarks, and is intended for commercial and research use in multiple languages.
113
-
114
  </center></p>
115
  """
116
 
117
- demo = gr.ChatInterface(
118
- respond,
119
- additional_inputs=[
120
- gr.Dropdown([
121
- "llama-3.2-1b-instruct-q4_k_m.gguf"
122
- ],
123
- value="llama-3.2-1b-instruct-q4_k_m.gguf",
124
- label="Model"
125
- ),
126
- gr.TextArea(value="""You are Meta Llama 3.2 (1B), an advanced AI assistant created by Meta. Your capabilities include:
127
-
128
- 1. Complex reasoning and problem-solving
129
- 2. Multilingual understanding and generation
130
- 3. Creative and analytical writing
131
- 4. Code understanding and generation
132
- 5. Task decomposition and step-by-step guidance
133
- 6. Summarization and information extraction
134
-
135
- Always strive for accuracy, clarity, and helpfulness in your responses. If you're unsure about something, express your uncertainty. Use the following format for your responses:
136
- """, label="System message"),
137
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max tokens"),
138
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
139
- gr.Slider(
140
- minimum=0.1,
141
- maximum=2.0,
142
- value=0.9,
143
- step=0.05,
144
- label="Top-p",
145
- ),
146
- gr.Slider(
147
- minimum=0,
148
- maximum=100,
149
- value=1,
150
- step=1,
151
- label="Top-k",
152
- ),
153
- gr.Slider(
154
- minimum=0.0,
155
- maximum=2.0,
156
- value=1.1,
157
- step=0.1,
158
- label="Repetition penalty",
159
- ),
160
- ],
161
- theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
162
  body_background_fill_dark="#16141c",
163
  block_background_fill_dark="#16141c",
164
  block_border_width="1px",
@@ -170,31 +124,42 @@ Always strive for accuracy, clarity, and helpfulness in your responses. If you'r
170
  background_fill_secondary_dark="#16141c",
171
  color_accent_soft_dark="transparent",
172
  code_background_fill_dark="#292733",
173
- ),
174
- title="Meta Llama 3.2 (1B)",
175
- description=description,
176
- chatbot=gr.Chatbot(
177
- scale=1,
178
- likeable=True,
179
- show_copy_button=True
180
- ),
181
- examples=[
182
- ["Hello! Can you introduce yourself?"],
183
- ["What's the capital of France?"],
184
- ["Can you explain the concept of photosynthesis?"],
185
- ["Write a short story about a robot learning to paint."],
186
- ["Explain the difference between machine learning and deep learning."],
187
- ["Summarize the key points of climate change and its global impact."],
188
- ["Explain quantum computing to a 10-year-old."],
189
- ["Design a step-by-step meal plan for someone trying to lose weight and build muscle."]
190
- ],
191
- cache_examples=False,
192
- autofocus=False,
193
- concurrency_limit=None
194
- )
 
 
 
 
 
 
 
 
 
 
195
 
196
  if __name__ == "__main__":
197
  demo.launch()
 
198
  # 旧版代码--------------------------------
199
  # import gradio as gr
200
 
 
98
  for output in stream:
99
  outputs += output
100
  token_count += len(output.split())
101
+ yield outputs, history + [(message, outputs)] # Update chatbot history
102
 
103
  end_time = time.time()
104
  latency = end_time - start_time
 
108
 
109
  description = """<p><center>
110
  <a href="https://huggingface.co/hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF" target="_blank">[Meta Llama 3.2 (1B)]</a>
 
111
  Meta Llama 3.2 (1B) is a multilingual large language model (LLM) optimized for conversational dialogue use cases, including agentic retrieval and summarization tasks. It outperforms many open-source and closed chat models on industry benchmarks, and is intended for commercial and research use in multiple languages.
 
112
  </center></p>
113
  """
114
 
115
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  body_background_fill_dark="#16141c",
117
  block_background_fill_dark="#16141c",
118
  block_border_width="1px",
 
124
  background_fill_secondary_dark="#16141c",
125
  color_accent_soft_dark="transparent",
126
  code_background_fill_dark="#292733",
127
+ )) as demo:
128
+
129
+ chatbot = gr.Chatbot(scale=1, show_copy_button=True) # 移除 likeable=True
130
+ message = gr.Textbox(label="Your message")
131
+ model_dropdown = gr.Dropdown(
132
+ ["llama-3.2-1b-instruct-q4_k_m.gguf"],
133
+ value="llama-3.2-1b-instruct-q4_k_m.gguf",
134
+ label="Model"
135
+ )
136
+ system_message = gr.TextArea(value="""You are Meta Llama 3.2 (1B), an advanced AI assistant created by Meta. Your capabilities include:
137
+ 1. Complex reasoning and problem-solving
138
+ 2. Multilingual understanding and generation
139
+ 3. Creative and analytical writing
140
+ 4. Code understanding and generation
141
+ 5. Task decomposition and step-by-step guidance
142
+ 6. Summarization and information extraction
143
+ Always strive for accuracy, clarity, and helpfulness in your responses. If you're unsure about something, express your uncertainty. Use the following format for your responses:
144
+ """, label="System message")
145
+ max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max tokens")
146
+ temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
147
+ top_p = gr.Slider(minimum=0.1, maximum=2.0, value=0.9, step=0.05, label="Top-p")
148
+ top_k = gr.Slider(minimum=0, maximum=100, value=1, step=1, label="Top-k")
149
+ repeat_penalty = gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty")
150
+
151
+ history = gr.State([])
152
+
153
+ def chat_fn(message, history, model, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty):
154
+ return respond(message, history, model, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty)
155
+
156
+ message.submit(chat_fn, [message, history, model_dropdown, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty], [chatbot, history])
157
+
158
+ gr.Markdown(description)
159
 
160
  if __name__ == "__main__":
161
  demo.launch()
162
+
163
  # 旧版代码--------------------------------
164
  # import gradio as gr
165