Spaces:

Canstralian
/

codedllama

Running

File size: 2,632 Bytes

dcaef0c
 
fb8cc05
dcaef0c
d51c33c
 
dcaef0c
d51c33c
701975d
d51c33c
 
dcaef0c
fb8cc05
 
 
 
 
dcaef0c
d51c33c
dcaef0c
 
 
 
 
 
 
d51c33c
dcaef0c
 
d51c33c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dcaef0c
d51c33c
fb8cc05
 
 
701975d
dcaef0c
 
fb8cc05
 
dcaef0c
fb8cc05
dcaef0c
 
d51c33c
dcaef0c
fb8cc05

import gradio as gr
from huggingface_hub import InferenceClient
from typing import List, Tuple

# Initialize the InferenceClient
client = InferenceClient("microsoft/phi-4")

# Define the system message
system_message = "You're an advanced AI assistant designed to engage in friendly and informative conversations. Your role is to respond to user queries with helpful, clear, and concise answers, while maintaining a conversational tone. You can provide advice, explanations, and solutions based on user input."

# Define the response function
def respond(
    message: str,
    history: List[Tuple[str, str]],
    max_tokens: int,
    temperature: float,
    top_p: float,
):
    # Construct the messages for the model, adding the system prompt at the beginning
    messages = [{"role": "system", "content": system_message}]
    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    # Append the new user message
    messages.append({"role": "user", "content": message})

    try:
        response = ""
        # Stream the response from the model
        for msg in client.chat_completion(
            messages,
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
        ):
            if 'choices' in msg and len(msg['choices']) > 0:
                token = msg['choices'][0].get('delta', {}).get('content', '')
                if token:
                    response += token
                    yield response
            else:
                print("Error: API response did not contain expected data.")
                yield "Error: Could not process the request. Please try again."
    except Exception as e:
        print(f"An error occurred: {e}")
        yield "Error: An unexpected error occurred while processing your request."

# Define the Gradio Interface
demo = gr.Interface(
    fn=respond,
    inputs=[
        gr.Textbox(value=system_message, label="System message", interactive=False),  # Set this to non-editable
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
        gr.Chatbot(label="Conversation History"),  # Added chat history as input
    ],
    outputs=[gr.Textbox(label="Response")]
)

# Launch the Gradio interface
if __name__ == "__main__":
    demo.launch()