Spaces:
Sleeping
Sleeping
import logging | |
from typing import cast | |
from threading import Lock | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import torch | |
from conversation import get_default_conv_template | |
import gradio as gr | |
from llama_cpp import Llama, CompletionChunk | |
import json | |
""" | |
model = Model(model_path='/path/to/model.bin') | |
while True: | |
try: | |
prompt = input("You: ", flush=True) | |
if prompt == '': | |
continue | |
print(f"AI:", end='') | |
for token in model.generate(prompt): | |
print(f"{token}", end='', flush=True) | |
print() | |
except KeyboardInterrupt: | |
break | |
""" | |
from huggingface_hub import hf_hub_download | |
model_path = "minichat-3b.q8_0.gguf" | |
mdlpath = hf_hub_download(repo_id="afrideva/MiniChat-3B-GGUF", filename=model_path) | |
lcpp_model = Llama(model_path=mdlpath) | |
def m3b_talk(text): | |
resp = "" | |
formattedQuery = "<s> [|User|]" + text + "</s> [|Assistant|]" | |
# for token in lcpp_model(formattedQuery, stop=["[|User|]", "\n"], echo=True): | |
# resp += lccp_model.detokenize(token) | |
r = lcpp_model(formattedQuery, stop=["[|User|]", "\n"], echo=True) | |
for c in r["choices"]: | |
resp += c["text"] | |
print(resp) | |
# jsn = json.loads(resp) | |
# answer = jsn["choices"][0]["text"].replace(formattedQuery, "") | |
return resp.replace(formattedQuery, "") | |
def main(): | |
logging.basicConfig(level=logging.INFO) | |
with gr.Blocks() as demo: | |
with gr.Row(variant="panel"): | |
gr.Markdown("## Talk to MiniChat-3B\n\nTalk to MiniChat-3B.") | |
with gr.Row(variant="panel"): | |
with gr.Column(variant="panel"): | |
m3b_talk_input = gr.Textbox(label="Message", placeholder="Type something here...") | |
with gr.Column(variant="panel"): | |
m3b_talk_output = gr.Textbox() | |
m3b_talk_btn = gr.Button("Send") | |
m3b_talk_btn.click(m3b_talk, inputs=m3b_talk_input, outputs=m3b_talk_output, api_name="talk_m3b") | |
demo.queue().launch() | |
if __name__ == "__main__": | |
main() | |