Spaces:
Sleeping
Sleeping
import gradio as gr | |
from llama_cpp import Llama | |
import datetime | |
convHistory = '' | |
modelfile = "TrillaTag-0.0.3_V2.gguf" | |
contextlength=128 | |
print("loading model...") | |
stt = datetime.datetime.now() | |
llm = Llama( | |
model_path=modelfile, | |
n_ctx=contextlength, | |
) | |
dt = datetime.datetime.now() - stt | |
print(f"Model loaded in {dt}") | |
def combine(prompt, temperature, max_new_tokens, top_p, repeat_penalty): | |
global convHistory | |
prompt = f"[INST]{prompt}[/INST]" | |
start = datetime.datetime.now() | |
generation = "" | |
prompt_tokens = f"Prompt Tokens: {len(llm.tokenize(bytes(prompt,encoding='utf-8')))}" | |
for character in llm(prompt, | |
max_tokens=max_new_tokens, | |
stop=["</s>"], | |
temperature = temperature, | |
repeat_penalty = repeat_penalty, | |
top_p = top_p, | |
echo=False, | |
stream=True): | |
generation += character["choices"][0]["text"] | |
answer_tokens = f"Out Tkns: {len(llm.tokenize(bytes(generation,encoding='utf-8')))}" | |
total_tokens = f"Total Tkns: {len(llm.tokenize(bytes(prompt,encoding='utf-8'))) + len(llm.tokenize(bytes(generation,encoding='utf-8')))}" | |
delta = datetime.datetime.now() - start | |
yield generation, delta, prompt_tokens, answer_tokens, total_tokens | |
print(convHistory) | |
return generation, delta, prompt_tokens, answer_tokens, total_tokens | |
#return generation, delta | |
# MAIN GRADIO INTERFACE | |
with gr.Blocks(theme='Medguy/base2') as demo: #theme=gr.themes.Glass() #theme='remilia/Ghostly' | |
#TITLE SECTION | |
with gr.Row(variant='compact'): | |
with gr.Column(scale=10): | |
with gr.Row(): | |
with gr.Column(min_width=80): | |
gentime = gr.Textbox(value="", placeholder="Generation Time:", min_width=50, show_label=False) | |
with gr.Column(min_width=80): | |
prompttokens = gr.Textbox(value="", placeholder="Prompt Tkn:", min_width=50, show_label=False) | |
with gr.Column(min_width=80): | |
outputokens = gr.Textbox(value="", placeholder="Output Tkn:", min_width=50, show_label=False) | |
with gr.Column(min_width=80): | |
totaltokens = gr.Textbox(value="", placeholder="Total Tokens:", min_width=50, show_label=False) | |
# INTERACTIVE INFOGRAPHIC SECTION | |
# PLAYGROUND INTERFACE SECTION | |
with gr.Row(): | |
with gr.Column(scale=1): | |
gr.Markdown( | |
f""" | |
### Tunning Parameters""") | |
temp = gr.Slider(label="Temperature",minimum=0.0, maximum=1.0, step=0.01, value=0.15) | |
top_p = gr.Slider(label="Top_P",minimum=0.0, maximum=1.0, step=0.01, value=0.15) | |
repPen = gr.Slider(label="Repetition Penalty",minimum=0.0, maximum=4.0, step=0.01, value=1) | |
max_len = gr.Slider(label="Maximum output length", minimum=10,maximum=contextlength,step=2, value=20) | |
btn = gr.Button(value="Generate", variant='primary') | |
with gr.Column(scale=4): | |
prompt = gr.Textbox(label="User Prompt", lines=6, show_copy_button=True) | |
output = gr.Textbox(value="", label="Output", lines = 12, show_copy_button=True) | |
btn.click(combine, inputs=[prompt,temp,max_len,top_p,repPen], outputs=[output,gentime,prompttokens,outputokens,totaltokens]) | |
if __name__ == "__main__": | |
demo.launch(inbrowser=True) |