from fastapi import FastAPI, HTTPException from fastapi.responses import StreamingResponse from pydantic import BaseModel, Field from typing import Literal import os from functools import lru_cache from openai import OpenAI app = FastAPI() ModelID = Literal[ "meta-llama/llama-3-70b-instruct", "anthropic/claude-3.5-sonnet", "deepseek/deepseek-coder", "anthropic/claude-3-haiku", "openai/gpt-3.5-turbo-instruct", "qwen/qwen-72b-chat", "google/gemma-2-27b-it" ] class QueryModel(BaseModel): user_query: str = Field(..., description="User's coding query") model_id: ModelID = Field( default="meta-llama/llama-3-70b-instruct", description="ID of the model to use for response generation" ) class Config: schema_extra = { "example": { "user_query": "How do I implement a binary search in Python?", "model_id": "meta-llama/llama-3-70b-instruct" } } @lru_cache() def get_api_keys(): return { "OPENROUTER_API_KEY": f"sk-or-v1-{os.environ['OPENROUTER_API_KEY']}" } api_keys = get_api_keys() or_client = OpenAI(api_key=api_keys["OPENROUTER_API_KEY"], base_url="https://openrouter.ai/api/v1") def chat_with_llama_stream(messages, model, max_output_tokens=4000): try: response = or_client.chat.completions.create( model=model, messages=messages, max_tokens=max_output_tokens, stream=True ) for chunk in response: if chunk.choices[0].delta.content is not None: yield chunk.choices[0].delta.content except Exception as e: raise HTTPException(status_code=500, detail=f"Error in model response: {str(e)}") @app.post("/coding-assistant") async def coding_assistant(query: QueryModel): """ Coding assistant endpoint that provides programming help based on user queries. Available models: - meta-llama/llama-3-70b-instruct (default) - anthropic/claude-3.5-sonnet - deepseek/deepseek-coder - anthropic/claude-3-haiku - openai/gpt-3.5-turbo-instruct - qwen/qwen-72b-chat - google/gemma-2-27b-it """ system_prompt = "You are a helpful assistant proficient in coding tasks. Help the user in understanding and writing code." messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": query.user_query} ] return StreamingResponse( chat_with_llama_stream(messages, model=query.model_id), media_type="text/event-stream" ) app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"],) if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)