ofai-kai-backup

Running

File size: 1,874 Bytes

e8bac0f
2db0d53
e8bac0f
2db0d53
e8bac0f
8ce49a6
2db0d53
e8bac0f
2db0d53
83746e4
 
2db0d53
 
 
 
83746e4
 
2db0d53
8ce49a6
2db0d53
 
8ce49a6
2db0d53
 
83746e4
 
 
 
 
2db0d53
cde7a7b
2db0d53
a8032bb
2db0d53
 
 
 
 
 
 
 
 
8ce49a6
2db0d53
a8032bb
 
a6549b1
 
 
 
 
 
 
 
 
2db0d53
a6549b1
2db0d53
8ce49a6
 
2db0d53
a6549b1
2db0d53
 
 
 
 
 
 
 
8ce49a6
 
a6549b1
83746e4
 
2db0d53

import gradio as gr
from huggingface_hub import InferenceClient
import os
import requests

# Set up the inference API client
hf_client = InferenceClient("mistralai/Mistral-Nemo-Instruct-2407", token=os.getenv("HF_TOKEN"))

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):

    system_prefix = """
If the input language is Korean, respond in Korean. If it's English, respond in English.
    """
    
    messages = [{"role": "system", "content": f"{system_prefix} {system_message}"}]  # Add prefix

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""

    for message in hf_client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = message.choices[0].delta.content
        if token is not None:
            response += token.strip("")  # Remove tokens
        yield response


theme = "Nymbo/Nymbo_Theme"

css = """
footer {
    visibility: hidden;
}
"""

demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="""
You are an AI assistant.
""", label="System Prompt"),
        gr.Slider(minimum=1, maximum=2000, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
    theme=theme,  # Apply theme
    css=css  # Apply CSS
)

if __name__ == "__main__":
    demo.launch()