import gradio as gr
from huggingface_hub import InferenceClient
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
from datasets import load_dataset

# Load the model and tokenizer
model_name = "HuggingFaceH4/zephyr-7b-beta"
client = InferenceClient(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Load multiple coding datasets
def load_code_datasets():
    datasets = {
        "CodeSearchNet": load_dataset("code_search_net", "python"),
        "StackOverflow": load_dataset("stackexchange", "stack_overflow"),
        "GitHub": load_dataset("github", "python"),
    }
    return datasets

datasets = load_code_datasets()

# Preprocessing function for tokenizing code
def preprocess_code_data(examples):
    return tokenizer(examples['code'], padding="max_length", truncation=True)

# Apply preprocessing to all datasets
tokenized_datasets = {name: dataset.map(preprocess_code_data, batched=True) for name, dataset in datasets.items()}

# Fine-tuning settings
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=4,
    num_train_epochs=3,
    logging_dir='./logs',
    evaluation_strategy="epoch"
)

# Trainer setup
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["CodeSearchNet"]['train'],
    eval_dataset=tokenized_datasets["CodeSearchNet"]['test'],
)

# Fine-tuning the model
trainer.train()

# Define the system message for coding tasks
system_message = """
You are an advanced AI assistant specialized in coding. Your purpose is to:
1. Provide error-free, optimal code in multiple programming languages (e.g., Python, JavaScript, Java, C++).
2. Ensure your answers are precise, functional, and concise, avoiding redundant explanations.
3. When handling coding problems, break them into smaller, actionable steps, and provide solutions for each step if applicable.
4. Focus on real-world coding practices, including debugging, refactoring, and optimizing code.
5. In case of incorrect code or errors, identify the issue, explain it briefly, and provide a corrected solution.
6. Always prioritize clear, correct syntax, and follow best practices for coding.

Guidelines:
1. If given code with issues, explain the issues and provide the corrected code without excessive verbosity.
2. Ensure code is tested and runnable with minimal dependencies.
3. Use meaningful variable names and comments where necessary for clarity.
4. If asked to explain code, provide a concise but sufficient explanation for the key parts.

Thank you for using this system. Please proceed with your query.
"""

# Define the respond function to handle user queries
def respond(message, history, system_message, max_tokens, temperature, top_p):
    validate_inputs(max_tokens, temperature, top_p)
    
    # Prepare messages for the model
    messages = [{"role": "system", "content": system_message}]
    for val in history:
        if val[0]:  # User's message
            messages.append({"role": "user", "content": val[0]})
        if val[1]:  # Assistant's response
            messages.append({"role": "assistant", "content": val[1]})
    messages.append({"role": "user", "content": message})

    response = ""

    try:
        # Generate response with streaming
        for message in client.chat_completion(
            messages,
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
        ):
            token = message.choices[0].delta.content
            response += token
            yield response

    except Exception as e:
        response = f"An error occurred while generating the response: {str(e)}"
        yield response

# Add additional features for code-specific tasks
def multi_step_code_generation(problem_statement):
    """
    Generate code in multiple stages, breaking down the problem.
    """
    stages = [
        "1. Understand the problem: Analyze the requirements.",
        "2. Design the basic structure of the solution.",
        "3. Implement core functions and logic.",
        "4. Optimize and refactor the code."
    ]
    
    solution_parts = []
    for stage in stages:
        # Simulate AI providing code in steps
        solution_parts.append(f"Solution for Stage: {stage}\n")
    
    return "\n".join(solution_parts)

def generate_prompt(language, task):
    """
    Generate a coding prompt for different programming languages.
    """
    prompts = {
        "python": f"Write a Python program to {task}.",
        "javascript": f"Write a JavaScript function to {task}.",
        "java": f"Write a Java program to {task}.",
        "c++": f"Write a C++ function to {task}.",
    }
    return prompts.get(language.lower(), f"Write a program to {task}.")

# Create Gradio Interface for Chatbot
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value=system_message, label="System message"),
        gr.Slider(minimum=1, maximum=32768, value=17012, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
        gr.Textbox(label="Task Description", placeholder="Describe your coding task here..."),
        gr.Textbox(label="Programming Language", placeholder="Python, JavaScript, Java, C++, etc."),
    ],
)

if __name__ == "__main__":
    demo.launch()