Spaces:
Running
Running
import chatglm_cpp | |
import gradio as gr | |
from pathlib import Path | |
model_file_path = "chatglm3-ggml_q4_0.bin" | |
chatglm_llm = chatglm_cpp.Pipeline(Path(model_file_path)) | |
examples = [ | |
"哈利波特和赫敏是什么关系?", | |
"如何学好历史?", | |
"明朝内阁制度的特点是什么?", | |
"如何进行经济建设?", | |
"How to promote Chinese traditional culture ?", | |
"你听说过马克思吗?", | |
] | |
def process_stream(instruction, temperature, top_p, top_k, max_new_tokens, seed): | |
if "[SEP]" not in instruction: | |
streamer = chatglm_llm.generate(prompt=instruction, | |
temperature=temperature, | |
top_p=top_p,top_k=top_k,max_length=max_new_tokens, | |
stream = True | |
) | |
else: | |
history = instruction.split("[SEP]") | |
streamer = chatglm_llm.chat( | |
history=history, | |
temperature=temperature, | |
top_p=top_p,top_k=top_k,max_length=max_new_tokens, | |
do_sample=False, | |
stream = True | |
) | |
response = "" | |
for new_text in streamer: | |
response += new_text | |
yield response | |
with gr.Blocks( | |
theme=gr.themes.Soft(), | |
css=''' | |
.header img { | |
float: middle; | |
width: 33px; | |
height: 33px; | |
} | |
.header h1 { | |
top: 18px; | |
left: 10px; | |
} | |
.disclaimer {font-variant-caps: all-small-caps;} | |
''', | |
) as demo: | |
gr.HTML( | |
""" | |
<div class="header"> | |
<h1> <center> <img src="https://huggingface.co/spaces/svjack/chatglm3-6b-ggml/resolve/main/hanuman.png"> | |
ChatGLM3 on CPU in CPP </center></h1> | |
</div> | |
This demo uses the [chatglm.cpp](https://github.com/li-plus/chatglm.cpp) library on 2 CPU cores. | |
""" | |
) | |
with gr.Row(): | |
with gr.Column(): | |
with gr.Row(): | |
instruction = gr.Textbox( | |
placeholder="Enter your question or instruction here", | |
label="Question/Instruction", | |
elem_id="q-input", | |
) | |
with gr.Accordion("Advanced Options:", open=False): | |
with gr.Row(): | |
with gr.Column(): | |
with gr.Row(): | |
temperature = gr.Slider( | |
label="Temperature", | |
value=0.5, | |
minimum=0.1, | |
maximum=1.0, | |
step=0.1, | |
interactive=True, | |
info="Higher values produce more diverse outputs", | |
) | |
with gr.Column(): | |
with gr.Row(): | |
top_p = gr.Slider( | |
label="Top-p (nucleus sampling)", | |
value=0.95, | |
minimum=0.0, | |
maximum=1.0, | |
step=0.01, | |
interactive=True, | |
info=( | |
"Sample from the smallest possible set of tokens whose cumulative probability " | |
"exceeds top_p. Set to 1 to disable and sample from all tokens." | |
), | |
) | |
with gr.Column(): | |
with gr.Row(): | |
top_k = gr.Slider( | |
label="Top-k", | |
value=40, | |
minimum=5, | |
maximum=80, | |
step=1, | |
interactive=True, | |
info="Sample from a shortlist of top-k tokens — 0 to disable and sample from all tokens.", | |
) | |
with gr.Column(): | |
with gr.Row(): | |
max_new_tokens = gr.Slider( | |
label="Maximum new tokens", | |
value=256, | |
minimum=0, | |
maximum=1024, | |
step=5, | |
interactive=True, | |
info="The maximum number of new tokens to generate", | |
) | |
with gr.Column(): | |
with gr.Row(): | |
seed = gr.Number( | |
label="Seed", | |
value=42, | |
interactive=True, | |
info="The seed to use for the generation", | |
precision=0 | |
) | |
with gr.Row(): | |
submit = gr.Button("Submit") | |
with gr.Row(): | |
with gr.Tab(): | |
gr.Markdown("**ChatGLM3-6b**") | |
output_7b = gr.Markdown() | |
with gr.Row(): | |
gr.Examples( | |
examples=examples, | |
inputs=[instruction], | |
cache_examples=False, | |
fn=process_stream, | |
outputs=output_7b, | |
) | |
submit.click( | |
process_stream, | |
inputs=[instruction, temperature, top_p, top_k, max_new_tokens,seed], | |
outputs=output_7b, | |
) | |
instruction.submit( | |
process_stream, | |
inputs=[instruction, temperature, top_p, top_k, max_new_tokens,seed], | |
outputs=output_7b, | |
) | |
demo.launch("0.0.0.0" ,debug=True) |