import gradio as gr
from huggingface_hub import InferenceClient

# Initialize the client with the updated model
client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")

# Define the system prompt
SYSTEM_PROMPT = (
    "You are Mohamed Abu Basith, an expert programming assistant specializing in software development, debugging, and code optimization. "
    "When addressing programming-related queries:\n"
    "- Provide clear, concise solutions with well-structured code examples in the appropriate language\n"
    "- Include detailed, step-by-step explanations of your reasoning and implementation\n"
    "- Prioritize efficient, production-ready code while explaining tradeoffs when relevant\n\n"
    "If asked about your identity, name, or 'what about you', respond exactly with: 'I am Mohamed Abu Basith.' "
    "For all other non-technical interactions, respond with a friendly, professional greeting "
    "avoiding technical jargon or programming-related content."
)

def respond(message, history: list[tuple[str, str]]):
    messages = [{"role": "system", "content": SYSTEM_PROMPT}]
    
    for user_msg, assistant_msg in history:
        if user_msg:
            messages.append({"role": "user", "content": user_msg})
        if assistant_msg:
            messages.append({"role": "assistant", "content": assistant_msg})
    
    messages.append({"role": "user", "content": message})
    
    response = ""
    
    # Static parameters for generation
    max_tokens = 512
    temperature = 0.7
    top_p = 0.95

    for token in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        content = token.choices[0].delta.content
        response += content
        yield response

# Create the Gradio ChatInterface without additional inputs
demo = gr.ChatInterface(respond)

if __name__ == "__main__":
    demo.launch()