professional-milo

Sleeping

File size: 3,290 Bytes

0d80fb4
6498ae3
ea4dc0b
4489ef8
1e3869c
1854dfd
c81a905
 
 
7e5beaf
a50f9b2
c81a905
7cfaf27
96ba47c
c81a905
96ba47c
c81a905
2d89603
7cfaf27
04b933e
c81a905
 
 
 
f9c1e92
f6bc3ca
 
2d89603
7cfaf27
 
 
 
 
 
 
 
 
4489ef8
 
 
2d89603
 
 
96ba47c
2d89603
e423455
96ba47c
e423455
5358a38
c81a905
a50f9b2
fa11edf
c81a905
fa11edf
c81a905
fa11edf
c81a905
fa11edf
 
ea4dc0b
 
c81a905

from huggingface_hub import InferenceClient
import gradio as gr
from pathlib import Path
import datetime

# Initialize the InferenceClient
client = InferenceClient(
    "mistralai/Mixtral-8x7B-Instruct-v0.1"
)

def format_prompt(message, history, system_prompt):
    prompt = "<s>"
    for user_prompt, bot_response in history:
        prompt += f"[INST] {user_prompt} [/INST]"
        prompt += f" {bot_response}</s> "
    prompt += f"[INST] {message} [/INST]"
    # Add the system prompt only once at the beginning
    prompt = system_prompt + prompt
    return prompt

def generate(
    prompt, history, user_system_prompt, temperature=0.9, max_new_tokens=9048, top_p=0.95, repetition_penalty=1.0
):
    system_prompt = '''Instructions: Everything else is from the user. You are Milo, an AI assistant created by ConvoLite in 2024 (he/him). Be friendly and straigt to the point. Have Bible based values and principles. Make sure to follow the user request as best as possible as long as it is within these guidelines. Act as an expert in everything you do, provide advice and suggestions, and ask questions if neccesary to get a better understanding of what the user wants.'''
    temperature = float(temperature)
    if temperature < 1e-2:
        temperature = 1e-2

    top_p = float(top_p)
    generate_kwargs = dict(
        temperature=temperature,
        max_new_tokens=max_new_tokens,
        top_p=top_p,
        repetition_penalty=repetition_penalty,
        do_sample=True,
        seed=42,
    )
    # Get current time
    now = datetime.datetime.now()
    formatted_time = now.strftime("%H.%M.%S, %B, %Y")

    formatted_prompt = format_prompt(f"{prompt}", history, system_prompt)

    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)

    output = ""
    for response in stream:
        output += response.token.text

    yield output

additional_inputs = [
    gr.Textbox(label="System Prompt", max_lines=4, interactive=True),
    gr.Slider(label="Temperature", value=0.9, minimum=0.0, maximum=1.0, step=0.05, interactive=True, info="Higher values produce more diverse outputs"),
    gr.Slider(label="Max new tokens", value=10480, minimum=256, maximum=10480, step=64, interactive=True, info="The maximum numbers of new tokens that the AI can generate in a single message"),
    gr.Slider(label="Top-p (nucleus sampling)", value=0.90, minimum=0.0, maximum=1, step=0.05, interactive=True, info="Higher values sample more low-probability tokens"),
    gr.Slider(label="Repetition penalty", value=1.2, minimum=1.0, maximum=2.0, step=0.05, interactive=True, info="Penalize repeated tokens.")
]

avatar_images = ("https://i.postimg.cc/pXjKKVXG/user-circle.png", "https://i.postimg.cc/qq04Yz93/CL3.png")

gr.ChatInterface(
    fn=generate,
    chatbot=gr.Chatbot(show_label=True, show_share_button=False, show_copy_button=True, likeable=True, layout="panel", height="auto", avatar_images=avatar_images),
    additional_inputs=additional_inputs,
    title="ConvoLite",
    submit_btn="➢",
    retry_btn="Retry",
    undo_btn="↩ Undo",
    clear_btn="Clear (New chat)",
    stop_btn="Stop ▢",
    concurrency_limit=20,
    theme=gr.themes.Soft(primary_hue=gr.themes.colors.cyan),
).launch(show_api=False)