svjack's picture
Update app.py
1f1be9a
import chatglm_cpp
import gradio as gr
from pathlib import Path
model_file_path = "chatglm2-ggml-q4_0.bin"
chatglm_llm = chatglm_cpp.Pipeline(Path(model_file_path))
examples = [
"苹果设备购买apple care有什么好处?",
"如何学好历史?",
"如何弘扬中华传统文化?",
"How to promote Chinese traditional culture ?",
"如何进行经济建设?",
"写一段孔子与马克思的对话录。",
]
def process_stream(instruction, temperature, top_p, top_k, max_new_tokens, seed):
if "[SEP]" not in instruction:
streamer = chatglm_llm.generate(prompt=instruction,
temperature=temperature,
top_p=top_p,top_k=top_k,max_length=max_new_tokens,
stream = True
)
else:
history = instruction.split("[SEP]")
streamer = chatglm_llm.chat(
history=history,
temperature=temperature,
top_p=top_p,top_k=top_k,max_length=max_new_tokens,
do_sample=False,
stream = True
)
response = ""
for new_text in streamer:
response += new_text
yield response
with gr.Blocks(
theme=gr.themes.Soft(),
css=".disclaimer {font-variant-caps: all-small-caps;}",
) as demo:
gr.Markdown(
"""<h1><center> ChatGLM2 on CPU in CPP 🐒</center></h1>
This demo uses the [chatglm.cpp](https://github.com/li-plus/chatglm.cpp) library on 2 CPU cores.
"""
)
with gr.Row():
with gr.Column():
with gr.Row():
instruction = gr.Textbox(
placeholder="Enter your question or instruction here",
label="Question/Instruction",
elem_id="q-input",
)
with gr.Accordion("Advanced Options:", open=False):
with gr.Row():
with gr.Column():
with gr.Row():
temperature = gr.Slider(
label="Temperature",
value=0.8,
minimum=0.1,
maximum=1.0,
step=0.1,
interactive=True,
info="Higher values produce more diverse outputs",
)
with gr.Column():
with gr.Row():
top_p = gr.Slider(
label="Top-p (nucleus sampling)",
value=0.95,
minimum=0.0,
maximum=1.0,
step=0.01,
interactive=True,
info=(
"Sample from the smallest possible set of tokens whose cumulative probability "
"exceeds top_p. Set to 1 to disable and sample from all tokens."
),
)
with gr.Column():
with gr.Row():
top_k = gr.Slider(
label="Top-k",
value=40,
minimum=5,
maximum=80,
step=1,
interactive=True,
info="Sample from a shortlist of top-k tokens — 0 to disable and sample from all tokens.",
)
with gr.Column():
with gr.Row():
max_new_tokens = gr.Slider(
label="Maximum new tokens",
value=256,
minimum=0,
maximum=1024,
step=5,
interactive=True,
info="The maximum number of new tokens to generate",
)
with gr.Column():
with gr.Row():
seed = gr.Number(
label="Seed",
value=42,
interactive=True,
info="The seed to use for the generation",
precision=0
)
with gr.Row():
submit = gr.Button("Submit")
with gr.Row():
with gr.Box():
gr.Markdown("**ChatGLM2-6b**")
output_7b = gr.Markdown()
with gr.Row():
gr.Examples(
examples=examples,
inputs=[instruction],
cache_examples=False,
fn=process_stream,
outputs=output_7b,
)
submit.click(
process_stream,
inputs=[instruction, temperature, top_p, top_k, max_new_tokens,seed],
outputs=output_7b,
)
instruction.submit(
process_stream,
inputs=[instruction, temperature, top_p, top_k, max_new_tokens,seed],
outputs=output_7b,
)
demo.queue(max_size=4, concurrency_count=1).launch(debug=True)