general_chat

Running

File size: 3,252 Bytes

adb504f
 
d3051c0
 
 
b1c8f17
26e0ddc
d3051c0
26e0ddc
b1c8f17
2e76cf7
adb504f
 
 
 
d3051c0
 
 
 
 
 
 
 
 
b1c8f17
1fc729a
d3051c0
 
 
 
 
26e0ddc
 
 
 
d3051c0
 
26e0ddc
 
 
 
 
 
 
 
 
d3051c0
 
26e0ddc
adb504f
26e0ddc
d3051c0
 
 
 
 
 
adb504f
d3051c0
 
 
26e0ddc
d3051c0
 
adb504f
 
 
 
 
d3051c0
adb504f
d3051c0
 
 
 
 
 
 
 
 
 
 
adb504f
 
d3051c0
 
 
 
 
 
 
 
 
 
 
2fc91ef
54210e7
 
adb504f

from fastapi import FastAPI, HTTPException, Depends, Security
from fastapi.security import APIKeyHeader
from fastapi.responses import StreamingResponse
from pydantic import BaseModel, Field
from typing import Literal
import os
from functools import lru_cache
from openai import OpenAI

app = FastAPI()

API_KEY_NAME = "X-API-Key"
API_KEY = os.environ.get("API_KEY", "default_secret_key")  # Set this in your environment variables
api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)

ModelID = Literal[
    "meta-llama/llama-3-70b-instruct",
    "anthropic/claude-3.5-sonnet",
    "deepseek/deepseek-coder",
    "anthropic/claude-3-haiku",
    "openai/gpt-3.5-turbo-instruct",
    "qwen/qwen-72b-chat",
    "google/gemma-2-27b-it"
]

class QueryModel(BaseModel):
    user_query: str = Field(..., description="User's coding query")
    model_id: ModelID = Field(
        default="meta-llama/llama-3-70b-instruct",
        description="ID of the model to use for response generation"
    )

    class Config:
        schema_extra = {
            "example": {
                "user_query": "How do I implement a binary search in Python?",
                "model_id": "meta-llama/llama-3-70b-instruct"
            }
        }

@lru_cache()
def get_api_keys():
    return {
        "OPENROUTER_API_KEY": f"sk-or-v1-{os.environ['OPENROUTER_API_KEY']}"
    }

api_keys = get_api_keys()
or_client = OpenAI(api_key=api_keys["OPENROUTER_API_KEY"], base_url="https://openrouter.ai/api/v1")

def chat_with_llama_stream(messages, model, max_output_tokens=2500):
    try:
        response = or_client.chat.completions.create(
            model=model,
            messages=messages,
            max_tokens=max_output_tokens,
            stream=True
        )
        
        for chunk in response:
            if chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error in model response: {str(e)}")

async def verify_api_key(api_key: str = Security(api_key_header)):
    if api_key != API_KEY:
        raise HTTPException(status_code=403, detail="Could not validate credentials")
    return api_key

@app.post("/coding-assistant")
async def coding_assistant(query: QueryModel, api_key: str = Depends(verify_api_key)):
    """
    Coding assistant endpoint that provides programming help based on user queries.

    Available models:
    - meta-llama/llama-3-70b-instruct (default)
    - anthropic/claude-3.5-sonnet
    - deepseek/deepseek-coder
    - anthropic/claude-3-haiku
    - openai/gpt-3.5-turbo-instruct
    - qwen/qwen-72b-chat
    - google/gemma-2-27b-it

    Requires API Key authentication via X-API-Key header.
    """
    system_prompt = "You are a helpful assistant proficient in coding tasks. Help the user in understanding and writing code."
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": query.user_query}
    ]

    return StreamingResponse(
        chat_with_llama_stream(messages, model=query.model_id),
        media_type="text/event-stream"
    )

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)