Spaces:
Sleeping
Sleeping
File size: 2,041 Bytes
53f8a32 f4fe081 da8a172 d487976 da8a172 9a75ff9 b424c57 fe36794 dc422ae 2a8c299 d487976 2a8c299 fe36794 ecd2bcd 2a8c299 c17b3f7 fe36794 d487976 fe36794 320c4ed 9a75ff9 aed5924 9a75ff9 d487976 53f8a32 d487976 53f8a32 d487976 53f8a32 9a75ff9 53f8a32 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import logging
from typing import cast
from threading import Lock
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from conversation import get_default_conv_template
import gradio as gr
from llama_cpp import Llama, CompletionChunk
import json
"""
model = Model(model_path='/path/to/model.bin')
while True:
try:
prompt = input("You: ", flush=True)
if prompt == '':
continue
print(f"AI:", end='')
for token in model.generate(prompt):
print(f"{token}", end='', flush=True)
print()
except KeyboardInterrupt:
break
"""
from huggingface_hub import hf_hub_download
model_path = "minichat-3b.q8_0.gguf"
mdlpath = hf_hub_download(repo_id="afrideva/MiniChat-3B-GGUF", filename=model_path)
lcpp_model = Llama(model_path=mdlpath)
def m3b_talk(text):
resp = ""
formattedQuery = "<s> [|User|]" + text + "</s> [|Assistant|]"
# for token in lcpp_model(formattedQuery, stop=["[|User|]", "\n"], echo=True):
# resp += lccp_model.detokenize(token)
r = lcpp_model(formattedQuery, stop=["[|User|]", "\n"], echo=True)
for c in r["choices"]:
resp += c["text"]
print(resp)
# jsn = json.loads(resp)
# answer = jsn["choices"][0]["text"].replace(formattedQuery, "")
return resp.replace(formattedQuery, "")
def main():
logging.basicConfig(level=logging.INFO)
with gr.Blocks() as demo:
with gr.Row(variant="panel"):
gr.Markdown("## Talk to MiniChat-3B\n\nTalk to MiniChat-3B.")
with gr.Row(variant="panel"):
with gr.Column(variant="panel"):
m3b_talk_input = gr.Textbox(label="Message", placeholder="Type something here...")
with gr.Column(variant="panel"):
m3b_talk_output = gr.Textbox()
m3b_talk_btn = gr.Button("Send")
m3b_talk_btn.click(m3b_talk, inputs=m3b_talk_input, outputs=m3b_talk_output, api_name="talk_m3b")
demo.queue().launch()
if __name__ == "__main__":
main()
|