Spaces:

Mososopo
/

TrillabitV1

Sleeping

App Files Files Community

TrillabitV1 / app.py

Mososopo

Update app.py

154522f verified 7 months ago

raw

history blame contribute delete

3.56 kB

	import gradio as gr
	from llama_cpp import Llama
	import datetime

	convHistory = ''
	modelfile = "TrillaTag-0.0.3_V2.gguf"

	contextlength=128

	print("loading model...")
	stt = datetime.datetime.now()

	llm = Llama(
	model_path=modelfile,
	n_ctx=contextlength,
	)
	dt = datetime.datetime.now() - stt
	print(f"Model loaded in {dt}")


	def combine(prompt, temperature, max_new_tokens, top_p, repeat_penalty):
	global convHistory

	prompt = f"[INST]{prompt}[/INST]"
	start = datetime.datetime.now()
	generation = ""

	prompt_tokens = f"Prompt Tokens: {len(llm.tokenize(bytes(prompt,encoding='utf-8')))}"
	for character in llm(prompt,
	max_tokens=max_new_tokens,
	stop=["</s>"],
	temperature = temperature,
	repeat_penalty = repeat_penalty,
	top_p = top_p,
	echo=False,
	stream=True):
	generation += character["choices"][0]["text"]

	answer_tokens = f"Out Tkns: {len(llm.tokenize(bytes(generation,encoding='utf-8')))}"
	total_tokens = f"Total Tkns: {len(llm.tokenize(bytes(prompt,encoding='utf-8'))) + len(llm.tokenize(bytes(generation,encoding='utf-8')))}"
	delta = datetime.datetime.now() - start
	yield generation, delta, prompt_tokens, answer_tokens, total_tokens

	print(convHistory)
	return generation, delta, prompt_tokens, answer_tokens, total_tokens
	#return generation, delta


	# MAIN GRADIO INTERFACE
	with gr.Blocks(theme='Medguy/base2') as demo: #theme=gr.themes.Glass() #theme='remilia/Ghostly'
	#TITLE SECTION
	with gr.Row(variant='compact'):
	with gr.Column(scale=10):
	with gr.Row():
	with gr.Column(min_width=80):
	gentime = gr.Textbox(value="", placeholder="Generation Time:", min_width=50, show_label=False)
	with gr.Column(min_width=80):
	prompttokens = gr.Textbox(value="", placeholder="Prompt Tkn:", min_width=50, show_label=False)
	with gr.Column(min_width=80):
	outputokens = gr.Textbox(value="", placeholder="Output Tkn:", min_width=50, show_label=False)
	with gr.Column(min_width=80):
	totaltokens = gr.Textbox(value="", placeholder="Total Tokens:", min_width=50, show_label=False)
	# INTERACTIVE INFOGRAPHIC SECTION


	# PLAYGROUND INTERFACE SECTION
	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown(
	f"""
	### Tunning Parameters""")
	temp = gr.Slider(label="Temperature",minimum=0.0, maximum=1.0, step=0.01, value=0.15)
	top_p = gr.Slider(label="Top_P",minimum=0.0, maximum=1.0, step=0.01, value=0.15)
	repPen = gr.Slider(label="Repetition Penalty",minimum=0.0, maximum=4.0, step=0.01, value=1)
	max_len = gr.Slider(label="Maximum output length", minimum=10,maximum=contextlength,step=2, value=20)

	btn = gr.Button(value="Generate", variant='primary')


	with gr.Column(scale=4):
	prompt = gr.Textbox(label="User Prompt", lines=6, show_copy_button=True)
	output = gr.Textbox(value="", label="Output", lines = 12, show_copy_button=True)
	btn.click(combine, inputs=[prompt,temp,max_len,top_p,repPen], outputs=[output,gentime,prompttokens,outputokens,totaltokens])


	if __name__ == "__main__":
	demo.launch(inbrowser=True)