File size: 2,041 Bytes
53f8a32
 
f4fe081
da8a172
d487976
 
 
da8a172
9a75ff9
b424c57
fe36794
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc422ae
2a8c299
d487976
2a8c299
fe36794
ecd2bcd
2a8c299
c17b3f7
fe36794
d487976
fe36794
320c4ed
 
9a75ff9
 
 
 
 
aed5924
9a75ff9
 
 
d487976
53f8a32
 
 
 
d487976
 
 
 
 
 
 
 
53f8a32
d487976
53f8a32
9a75ff9
53f8a32
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import logging
from typing import cast
from threading import Lock
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

from conversation import get_default_conv_template
import gradio as gr
from llama_cpp import Llama, CompletionChunk
import json

"""

model = Model(model_path='/path/to/model.bin')
while True:
    try:
        prompt = input("You: ", flush=True)
        if prompt == '':
            continue
        print(f"AI:", end='')
        for token in model.generate(prompt):
            print(f"{token}", end='', flush=True)
        print()
    except KeyboardInterrupt:
        break
"""

from huggingface_hub import hf_hub_download

model_path = "minichat-3b.q8_0.gguf"

mdlpath = hf_hub_download(repo_id="afrideva/MiniChat-3B-GGUF", filename=model_path)

lcpp_model = Llama(model_path=mdlpath)

def m3b_talk(text):
    resp = ""
    formattedQuery = "<s> [|User|]" + text + "</s> [|Assistant|]"

    # for token in lcpp_model(formattedQuery, stop=["[|User|]", "\n"], echo=True):
    #     resp += lccp_model.detokenize(token)
    r = lcpp_model(formattedQuery, stop=["[|User|]", "\n"], echo=True)
    for c in r["choices"]:
        resp += c["text"]
    print(resp)
    # jsn = json.loads(resp)
    # answer = jsn["choices"][0]["text"].replace(formattedQuery, "")
    return resp.replace(formattedQuery, "")

def main():
    logging.basicConfig(level=logging.INFO)

    with gr.Blocks() as demo:
        with gr.Row(variant="panel"):
            gr.Markdown("## Talk to MiniChat-3B\n\nTalk to MiniChat-3B.")
        with gr.Row(variant="panel"):
            with gr.Column(variant="panel"):
                m3b_talk_input = gr.Textbox(label="Message", placeholder="Type something here...")
            with gr.Column(variant="panel"):
                m3b_talk_output = gr.Textbox()
                m3b_talk_btn = gr.Button("Send")

        m3b_talk_btn.click(m3b_talk, inputs=m3b_talk_input, outputs=m3b_talk_output, api_name="talk_m3b")

    demo.queue().launch()


if __name__ == "__main__":
    main()