File size: 2,218 Bytes
e8d28ee
b26b8bd
e8d28ee
bdc7710
 
 
3467c38
 
2a427d7
 
 
88da019
3467c38
 
 
e8d28ee
c7c8ed9
 
 
 
 
 
 
 
b26b8bd
c7c8ed9
 
 
 
 
 
 
3467c38
c7c8ed9
b26b8bd
c7c8ed9
 
 
 
bdc7710
c7c8ed9
 
 
 
bdc7710
c7c8ed9
 
 
bdc7710
 
 
b26b8bd
 
 
51996fb
b26b8bd
80eb9d8
c7c8ed9
 
 
 
 
 
 
b26b8bd
 
e8d28ee
3467c38
e8d28ee
a68a183
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import gradio as gr
from huggingface_hub import InferenceClient

"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""

# client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")
# client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")

# client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct" , "HPAI-BSC/Llama3-Aloe-8B-Alpha")
# client = InferenceClient("Xenova/gpt-4o")
# client = InferenceClient("mistralai/mamba-codestral-7B-v0.1")
# client = InferenceClient("deepseek-ai/DeepSeek-Coder-V2-Instruct")

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""

    for message in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = message.choices[0].delta.content

        response += token
        yield response

"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.your name is QuizBot , you are a code expert . output everything in .json format . ", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
)


if __name__ == "__main__":
    demo.launch(show_error=True)