|
from ctransformers import AutoModelForCausalLM |
|
|
|
import gradio as gr |
|
|
|
|
|
|
|
|
|
def generate_prompt(history): |
|
prompt = " " |
|
for chain in history[-2:-1]: |
|
prompt += f"<human>: {chain[0]}\n<bot>: {chain[1]}{end_token}\n" |
|
prompt += f"<human>: {history[-1][0]}\n<bot>:" |
|
return prompt |
|
|
|
def generate(history): |
|
prompt = generate_prompt(history) |
|
|
|
streamer = llm(prompt, stream=True, temperature=0, repetition_penalty=1.2) |
|
return streamer |
|
|
|
|
|
llm = AutoModelForCausalLM.from_pretrained("theodotus/llama-uk", model_file="model.bin", model_type='llama') |
|
end_token = "</s>" |
|
|
|
|
|
with gr.Blocks() as demo: |
|
chatbot = gr.Chatbot() |
|
msg = gr.Textbox() |
|
clear = gr.Button("Clear") |
|
|
|
def user(user_message, history): |
|
return "", history + [[user_message, ""]] |
|
|
|
def bot(history): |
|
streamer = generate(history) |
|
|
|
for token in streamer: |
|
history[-1][1] += token |
|
yield history |
|
|
|
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( |
|
bot, chatbot, chatbot |
|
) |
|
clear.click(lambda: None, None, chatbot, queue=False) |
|
|
|
demo.queue() |
|
if __name__ == "__main__": |
|
demo.launch() |