from fastapi import FastAPI, HTTPException, Depends, Security
from fastapi.security import APIKeyHeader
from fastapi.responses import StreamingResponse
from pydantic import BaseModel, Field
from typing import Literal
import os
from functools import lru_cache
from openai import OpenAI

app = FastAPI()

API_KEY_NAME = "X-API-Key"
API_KEY = os.environ.get("API_KEY", "default_secret_key")  # Set this in your environment variables
api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)

ModelID = Literal[
    "meta-llama/llama-3-70b-instruct",
    "anthropic/claude-3.5-sonnet",
    "deepseek/deepseek-coder",
    "anthropic/claude-3-haiku",
    "openai/gpt-3.5-turbo-instruct",
    "qwen/qwen-72b-chat",
    "google/gemma-2-27b-it"
]

class QueryModel(BaseModel):
    user_query: str = Field(..., description="User's coding query")
    model_id: ModelID = Field(
        default="meta-llama/llama-3-70b-instruct",
        description="ID of the model to use for response generation"
    )

    class Config:
        schema_extra = {
            "example": {
                "user_query": "How do I implement a binary search in Python?",
                "model_id": "meta-llama/llama-3-70b-instruct"
            }
        }

@lru_cache()
def get_api_keys():
    return {
        "OPENROUTER_API_KEY": f"sk-or-v1-{os.environ['OPENROUTER_API_KEY']}"
    }

api_keys = get_api_keys()
or_client = OpenAI(api_key=api_keys["OPENROUTER_API_KEY"], base_url="https://openrouter.ai/api/v1")

def chat_with_llama_stream(messages, model, max_output_tokens=2500):
    try:
        response = or_client.chat.completions.create(
            model=model,
            messages=messages,
            max_tokens=max_output_tokens,
            stream=True
        )
        
        for chunk in response:
            if chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error in model response: {str(e)}")

async def verify_api_key(api_key: str = Security(api_key_header)):
    if api_key != API_KEY:
        raise HTTPException(status_code=403, detail="Could not validate credentials")
    return api_key

@app.post("/coding-assistant")
async def coding_assistant(query: QueryModel, api_key: str = Depends(verify_api_key)):
    """
    Coding assistant endpoint that provides programming help based on user queries.

    Available models:
    - meta-llama/llama-3-70b-instruct (default)
    - anthropic/claude-3.5-sonnet
    - deepseek/deepseek-coder
    - anthropic/claude-3-haiku
    - openai/gpt-3.5-turbo-instruct
    - qwen/qwen-72b-chat
    - google/gemma-2-27b-it

    Requires API Key authentication via X-API-Key header.
    """
    system_prompt = "You are a helpful assistant proficient in coding tasks. Help the user in understanding and writing code."
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": query.user_query}
    ]

    return StreamingResponse(
        chat_with_llama_stream(messages, model=query.model_id),
        media_type="text/event-stream"
    )

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)