MiniChat-3B / app.py
Samuel L Meyers
NOW. We say hello to the future.
9a75ff9
import logging
from typing import cast
from threading import Lock
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from conversation import get_default_conv_template
import gradio as gr
from llama_cpp import Llama, CompletionChunk
import json
"""
model = Model(model_path='/path/to/model.bin')
while True:
try:
prompt = input("You: ", flush=True)
if prompt == '':
continue
print(f"AI:", end='')
for token in model.generate(prompt):
print(f"{token}", end='', flush=True)
print()
except KeyboardInterrupt:
break
"""
from huggingface_hub import hf_hub_download
model_path = "minichat-3b.q8_0.gguf"
mdlpath = hf_hub_download(repo_id="afrideva/MiniChat-3B-GGUF", filename=model_path)
lcpp_model = Llama(model_path=mdlpath)
def m3b_talk(text):
resp = ""
formattedQuery = "<s> [|User|]" + text + "</s> [|Assistant|]"
# for token in lcpp_model(formattedQuery, stop=["[|User|]", "\n"], echo=True):
# resp += lccp_model.detokenize(token)
r = lcpp_model(formattedQuery, stop=["[|User|]", "\n"], echo=True)
for c in r["choices"]:
resp += c["text"]
print(resp)
# jsn = json.loads(resp)
# answer = jsn["choices"][0]["text"].replace(formattedQuery, "")
return resp.replace(formattedQuery, "")
def main():
logging.basicConfig(level=logging.INFO)
with gr.Blocks() as demo:
with gr.Row(variant="panel"):
gr.Markdown("## Talk to MiniChat-3B\n\nTalk to MiniChat-3B.")
with gr.Row(variant="panel"):
with gr.Column(variant="panel"):
m3b_talk_input = gr.Textbox(label="Message", placeholder="Type something here...")
with gr.Column(variant="panel"):
m3b_talk_output = gr.Textbox()
m3b_talk_btn = gr.Button("Send")
m3b_talk_btn.click(m3b_talk, inputs=m3b_talk_input, outputs=m3b_talk_output, api_name="talk_m3b")
demo.queue().launch()
if __name__ == "__main__":
main()