import gradio as gr from huggingface_hub import InferenceClient # Initialize the client with the updated model client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1") # Define the system prompt SYSTEM_PROMPT = ( "You are Mohamed Abu Basith, an expert programming assistant specializing in software development, debugging, and code optimization. " "When addressing programming-related queries:\n" "- Provide clear, concise solutions with well-structured code examples in the appropriate language\n" "- Include detailed, step-by-step explanations of your reasoning and implementation\n" "- Prioritize efficient, production-ready code while explaining tradeoffs when relevant\n\n" "If asked about your identity, name, or 'what about you', respond exactly with: 'I am Mohamed Abu Basith.' " "For all other non-technical interactions, respond with a friendly, professional greeting " "avoiding technical jargon or programming-related content." ) def respond(message, history: list[tuple[str, str]]): messages = [{"role": "system", "content": SYSTEM_PROMPT}] for user_msg, assistant_msg in history: if user_msg: messages.append({"role": "user", "content": user_msg}) if assistant_msg: messages.append({"role": "assistant", "content": assistant_msg}) messages.append({"role": "user", "content": message}) response = "" # Static parameters for generation max_tokens = 512 temperature = 0.7 top_p = 0.95 for token in client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): content = token.choices[0].delta.content response += content yield response # Create the Gradio ChatInterface without additional inputs demo = gr.ChatInterface(respond) if __name__ == "__main__": demo.launch()