File size: 2,180 Bytes
0284d5c
 
a023bcb
5b3ef21
1a4abae
5b3ef21
 
a023bcb
6cc4580
c11dc45
6cc4580
0284d5c
a023bcb
0284d5c
cc4e355
c11dc45
fae8ffe
0284d5c
 
8f263fc
a023bcb
 
 
0284d5c
d99a054
8f263fc
 
a023bcb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6cc4580
a023bcb
 
 
 
8f263fc
a023bcb
 
7bb3f3e
 
 
a023bcb
 
 
 
 
8f263fc
a023bcb
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import gradio as gr
import os 
from pathlib import Path

os.environ["CMAKE_ARGS"] = "-DLLAMA_CUBLAS=on"
os.system('CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python[server]')

import argparse
model_file = "yi-chat-6b.Q4_K_M.gguf"
if not os.path.isfile(model_file):
    os.system("wget -c https://huggingface.co/XeIaso/yi-chat-6B-GGUF/resolve/main/yi-chat-6b.Q4_K_M.gguf")

DEFAULT_MODEL_PATH = model_file

from llama_cpp import Llama
llm = Llama(model_path=model_file, model_type="mistral")
llm._token_eos = 7


def predict(input, chatbot, max_length, top_p, temperature, history):
    chatbot.append((input, ""))
    response = ""
    history.append(input)

    for output in llm(input, stream=True, temperature=temperature, top_p=top_p, max_tokens=max_length, stop=["<|im_end|>"]):
        piece = output['choices'][0]['text']
        response += piece
        chatbot[-1] = (chatbot[-1][0], response)

        yield chatbot, history

    history.append(response)
    yield chatbot, history


def reset_user_input():
    return gr.update(value="")


def reset_state():
    return [], []


with gr.Blocks() as demo:
    gr.HTML("""<h1 align="center">Yi-6B-Chat by llama-cpp-python</h1>""")

    chatbot = gr.Chatbot()
    with gr.Row():
        with gr.Column(scale=4):
            user_input = gr.Textbox(show_label=False, placeholder="Input...", lines=8)
            submitBtn = gr.Button("Submit", variant="primary")
        with gr.Column(scale=1):
            max_length = gr.Slider(0, 32048, value=2048, step=1.0, label="Maximum Length", interactive=True)
            top_p = gr.Slider(0, 1, value=0.7, step=0.01, label="Top P", interactive=True)
            temperature = gr.Slider(0, 1, value=0.95, step=0.01, label="Temperature", interactive=True)
            emptyBtn = gr.Button("Clear History")

    history = gr.State([])

    submitBtn.click(
        predict, [user_input, chatbot, max_length, top_p, temperature, history], [chatbot, history], show_progress=True
    )
    submitBtn.click(reset_user_input, [], [user_input])

    emptyBtn.click(reset_state, outputs=[chatbot, history], show_progress=True)

demo.queue().launch(share=False, inbrowser=True)