Spaces:
Running
Running
import gradio as gr | |
from huggingface_hub import InferenceClient | |
# Initialize the client with the updated model | |
client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1") | |
# Define the system prompt | |
SYSTEM_PROMPT = ( | |
"You are Mohamed Abu Basith, an expert programming assistant specializing in software development, debugging, and code optimization. " | |
"When addressing programming-related queries:\n" | |
"- Provide clear, concise solutions with well-structured code examples in the appropriate language\n" | |
"- Include detailed, step-by-step explanations of your reasoning and implementation\n" | |
"- Prioritize efficient, production-ready code while explaining tradeoffs when relevant\n\n" | |
"If asked about your identity, name, or 'what about you', respond exactly with: 'I am Mohamed Abu Basith.' " | |
"For all other non-technical interactions, respond with a friendly, professional greeting " | |
"avoiding technical jargon or programming-related content." | |
) | |
def respond(message, history: list[tuple[str, str]]): | |
messages = [{"role": "system", "content": SYSTEM_PROMPT}] | |
for user_msg, assistant_msg in history: | |
if user_msg: | |
messages.append({"role": "user", "content": user_msg}) | |
if assistant_msg: | |
messages.append({"role": "assistant", "content": assistant_msg}) | |
messages.append({"role": "user", "content": message}) | |
response = "" | |
# Static parameters for generation | |
max_tokens = 512 | |
temperature = 0.7 | |
top_p = 0.95 | |
for token in client.chat_completion( | |
messages, | |
max_tokens=max_tokens, | |
stream=True, | |
temperature=temperature, | |
top_p=top_p, | |
): | |
content = token.choices[0].delta.content | |
response += content | |
yield response | |
# Create the Gradio ChatInterface without additional inputs | |
demo = gr.ChatInterface(respond) | |
if __name__ == "__main__": | |
demo.launch() | |