svjack's picture
Update app.py
4cf98c1
import chatglm_cpp
import gradio as gr
from pathlib import Path
model_file_path = "chatglm3-ggml_q4_0.bin"
chatglm_llm = chatglm_cpp.Pipeline(Path(model_file_path))
examples = [
"哈利波特和赫敏是什么关系?",
"如何学好历史?",
"明朝内阁制度的特点是什么?",
"如何进行经济建设?",
"How to promote Chinese traditional culture ?",
"你听说过马克思吗?",
]
def process_stream(instruction, temperature, top_p, top_k, max_new_tokens, seed):
if "[SEP]" not in instruction:
streamer = chatglm_llm.generate(prompt=instruction,
temperature=temperature,
top_p=top_p,top_k=top_k,max_length=max_new_tokens,
stream = True
)
else:
history = instruction.split("[SEP]")
streamer = chatglm_llm.chat(
history=history,
temperature=temperature,
top_p=top_p,top_k=top_k,max_length=max_new_tokens,
do_sample=False,
stream = True
)
response = ""
for new_text in streamer:
response += new_text
yield response
with gr.Blocks(
theme=gr.themes.Soft(),
css='''
.header img {
float: middle;
width: 33px;
height: 33px;
}
.header h1 {
top: 18px;
left: 10px;
}
.disclaimer {font-variant-caps: all-small-caps;}
''',
) as demo:
gr.HTML(
"""
<div class="header">
<h1> <center> <img src="https://huggingface.co/spaces/svjack/chatglm3-6b-ggml/resolve/main/hanuman.png">
ChatGLM3 on CPU in CPP </center></h1>
</div>
This demo uses the [chatglm.cpp](https://github.com/li-plus/chatglm.cpp) library on 2 CPU cores.
"""
)
with gr.Row():
with gr.Column():
with gr.Row():
instruction = gr.Textbox(
placeholder="Enter your question or instruction here",
label="Question/Instruction",
elem_id="q-input",
)
with gr.Accordion("Advanced Options:", open=False):
with gr.Row():
with gr.Column():
with gr.Row():
temperature = gr.Slider(
label="Temperature",
value=0.5,
minimum=0.1,
maximum=1.0,
step=0.1,
interactive=True,
info="Higher values produce more diverse outputs",
)
with gr.Column():
with gr.Row():
top_p = gr.Slider(
label="Top-p (nucleus sampling)",
value=0.95,
minimum=0.0,
maximum=1.0,
step=0.01,
interactive=True,
info=(
"Sample from the smallest possible set of tokens whose cumulative probability "
"exceeds top_p. Set to 1 to disable and sample from all tokens."
),
)
with gr.Column():
with gr.Row():
top_k = gr.Slider(
label="Top-k",
value=40,
minimum=5,
maximum=80,
step=1,
interactive=True,
info="Sample from a shortlist of top-k tokens — 0 to disable and sample from all tokens.",
)
with gr.Column():
with gr.Row():
max_new_tokens = gr.Slider(
label="Maximum new tokens",
value=256,
minimum=0,
maximum=1024,
step=5,
interactive=True,
info="The maximum number of new tokens to generate",
)
with gr.Column():
with gr.Row():
seed = gr.Number(
label="Seed",
value=42,
interactive=True,
info="The seed to use for the generation",
precision=0
)
with gr.Row():
submit = gr.Button("Submit")
with gr.Row():
with gr.Tab():
gr.Markdown("**ChatGLM3-6b**")
output_7b = gr.Markdown()
with gr.Row():
gr.Examples(
examples=examples,
inputs=[instruction],
cache_examples=False,
fn=process_stream,
outputs=output_7b,
)
submit.click(
process_stream,
inputs=[instruction, temperature, top_p, top_k, max_new_tokens,seed],
outputs=output_7b,
)
instruction.submit(
process_stream,
inputs=[instruction, temperature, top_p, top_k, max_new_tokens,seed],
outputs=output_7b,
)
demo.launch("0.0.0.0" ,debug=True)