Spaces:

svjack
/

chatglm3-6b-ggml

Running

App Files Files Community

chatglm3-6b-ggml / app.py

svjack

Update app.py

4cf98c1 about 1 year ago

raw

history blame contribute delete

5.69 kB

	import chatglm_cpp
	import gradio as gr
	from pathlib import Path

	model_file_path = "chatglm3-ggml_q4_0.bin"
	chatglm_llm = chatglm_cpp.Pipeline(Path(model_file_path))

	examples = [
	"哈利波特和赫敏是什么关系？",
	"如何学好历史？",
	"明朝内阁制度的特点是什么?",
	"如何进行经济建设?",
	"How to promote Chinese traditional culture ?",
	"你听说过马克思吗？",
	]


	def process_stream(instruction, temperature, top_p, top_k, max_new_tokens, seed):
	if "[SEP]" not in instruction:
	streamer = chatglm_llm.generate(prompt=instruction,
	temperature=temperature,
	top_p=top_p,top_k=top_k,max_length=max_new_tokens,
	stream = True
	)
	else:
	history = instruction.split("[SEP]")
	streamer = chatglm_llm.chat(
	history=history,
	temperature=temperature,
	top_p=top_p,top_k=top_k,max_length=max_new_tokens,
	do_sample=False,
	stream = True
	)
	response = ""
	for new_text in streamer:
	response += new_text
	yield response


	with gr.Blocks(
	theme=gr.themes.Soft(),
	css='''
	.header img {
	float: middle;
	width: 33px;
	height: 33px;
	}
	.header h1 {
	top: 18px;
	left: 10px;
	}
	.disclaimer {font-variant-caps: all-small-caps;}
	''',
	) as demo:
	gr.HTML(
	"""
	<div class="header">
	<h1> <center> <img src="https://huggingface.co/spaces/svjack/chatglm3-6b-ggml/resolve/main/hanuman.png">
	ChatGLM3 on CPU in CPP </center></h1>
	</div>
	This demo uses the [chatglm.cpp](https://github.com/li-plus/chatglm.cpp) library on 2 CPU cores.
	"""
	)
	with gr.Row():
	with gr.Column():
	with gr.Row():
	instruction = gr.Textbox(
	placeholder="Enter your question or instruction here",
	label="Question/Instruction",
	elem_id="q-input",
	)
	with gr.Accordion("Advanced Options:", open=False):
	with gr.Row():
	with gr.Column():
	with gr.Row():
	temperature = gr.Slider(
	label="Temperature",
	value=0.5,
	minimum=0.1,
	maximum=1.0,
	step=0.1,
	interactive=True,
	info="Higher values produce more diverse outputs",
	)
	with gr.Column():
	with gr.Row():
	top_p = gr.Slider(
	label="Top-p (nucleus sampling)",
	value=0.95,
	minimum=0.0,
	maximum=1.0,
	step=0.01,
	interactive=True,
	info=(
	"Sample from the smallest possible set of tokens whose cumulative probability "
	"exceeds top_p. Set to 1 to disable and sample from all tokens."
	),
	)
	with gr.Column():
	with gr.Row():
	top_k = gr.Slider(
	label="Top-k",
	value=40,
	minimum=5,
	maximum=80,
	step=1,
	interactive=True,
	info="Sample from a shortlist of top-k tokens — 0 to disable and sample from all tokens.",
	)
	with gr.Column():
	with gr.Row():
	max_new_tokens = gr.Slider(
	label="Maximum new tokens",
	value=256,
	minimum=0,
	maximum=1024,
	step=5,
	interactive=True,
	info="The maximum number of new tokens to generate",
	)

	with gr.Column():
	with gr.Row():
	seed = gr.Number(
	label="Seed",
	value=42,
	interactive=True,
	info="The seed to use for the generation",
	precision=0
	)
	with gr.Row():
	submit = gr.Button("Submit")
	with gr.Row():
	with gr.Tab():
	gr.Markdown("ChatGLM3-6b")
	output_7b = gr.Markdown()

	with gr.Row():
	gr.Examples(
	examples=examples,
	inputs=[instruction],
	cache_examples=False,
	fn=process_stream,
	outputs=output_7b,
	)

	submit.click(
	process_stream,
	inputs=[instruction, temperature, top_p, top_k, max_new_tokens,seed],
	outputs=output_7b,
	)
	instruction.submit(
	process_stream,
	inputs=[instruction, temperature, top_p, top_k, max_new_tokens,seed],
	outputs=output_7b,
	)

	demo.launch("0.0.0.0" ,debug=True)