llama-uk / app.py
theodotus's picture
Fixed tokenization
5d295fa
from ctransformers import AutoModelForCausalLM
import gradio as gr
def generate_prompt(history):
prompt = " "
for chain in history[-2:-1]:
prompt += f"<human>: {chain[0]}\n<bot>: {chain[1]}{end_token}\n"
prompt += f"<human>: {history[-1][0]}\n<bot>:"
return prompt
def generate(history):
prompt = generate_prompt(history)
streamer = llm(prompt, stream=True, temperature=0, repetition_penalty=1.2)
return streamer
llm = AutoModelForCausalLM.from_pretrained("theodotus/llama-uk", model_file="model.bin", model_type='llama')
end_token = "</s>"
with gr.Blocks() as demo:
chatbot = gr.Chatbot()
msg = gr.Textbox()
clear = gr.Button("Clear")
def user(user_message, history):
return "", history + [[user_message, ""]]
def bot(history):
streamer = generate(history)
for token in streamer:
history[-1][1] += token
yield history
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
bot, chatbot, chatbot
)
clear.click(lambda: None, None, chatbot, queue=False)
demo.queue()
if __name__ == "__main__":
demo.launch()