from fastapi import FastAPI, HTTPException, Depends, Security from fastapi.security import APIKeyHeader from fastapi.responses import StreamingResponse from pydantic import BaseModel, Field from typing import Literal import os from functools import lru_cache from openai import OpenAI app = FastAPI() API_KEY_NAME = "X-API-Key" API_KEY = os.environ.get("API_KEY", "default_secret_key") # Set this in your environment variables api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False) ModelID = Literal[ "meta-llama/llama-3-70b-instruct", "anthropic/claude-3.5-sonnet", "deepseek/deepseek-coder", "anthropic/claude-3-haiku", "openai/gpt-3.5-turbo-instruct", "qwen/qwen-72b-chat", "google/gemma-2-27b-it" ] class QueryModel(BaseModel): user_query: str = Field(..., description="User's coding query") model_id: ModelID = Field( default="meta-llama/llama-3-70b-instruct", description="ID of the model to use for response generation" ) class Config: schema_extra = { "example": { "user_query": "How do I implement a binary search in Python?", "model_id": "meta-llama/llama-3-70b-instruct" } } @lru_cache() def get_api_keys(): return { "OPENROUTER_API_KEY": f"sk-or-v1-{os.environ['OPENROUTER_API_KEY']}" } api_keys = get_api_keys() or_client = OpenAI(api_key=api_keys["OPENROUTER_API_KEY"], base_url="https://openrouter.ai/api/v1") def chat_with_llama_stream(messages, model, max_output_tokens=2500): try: response = or_client.chat.completions.create( model=model, messages=messages, max_tokens=max_output_tokens, stream=True ) for chunk in response: if chunk.choices[0].delta.content is not None: yield chunk.choices[0].delta.content except Exception as e: raise HTTPException(status_code=500, detail=f"Error in model response: {str(e)}") async def verify_api_key(api_key: str = Security(api_key_header)): if api_key != API_KEY: raise HTTPException(status_code=403, detail="Could not validate credentials") return api_key @app.post("/coding-assistant") async def coding_assistant(query: QueryModel, api_key: str = Depends(verify_api_key)): """ Coding assistant endpoint that provides programming help based on user queries. Available models: - meta-llama/llama-3-70b-instruct (default) - anthropic/claude-3.5-sonnet - deepseek/deepseek-coder - anthropic/claude-3-haiku - openai/gpt-3.5-turbo-instruct - qwen/qwen-72b-chat - google/gemma-2-27b-it Requires API Key authentication via X-API-Key header. """ system_prompt = "You are a helpful assistant proficient in coding tasks. Help the user in understanding and writing code." messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": query.user_query} ] return StreamingResponse( chat_with_llama_stream(messages, model=query.model_id), media_type="text/event-stream" ) if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)