Spaces:
Running
Running
File size: 3,252 Bytes
adb504f d3051c0 b1c8f17 26e0ddc d3051c0 26e0ddc b1c8f17 2e76cf7 adb504f d3051c0 b1c8f17 1fc729a d3051c0 26e0ddc d3051c0 26e0ddc d3051c0 26e0ddc adb504f 26e0ddc d3051c0 adb504f d3051c0 26e0ddc d3051c0 adb504f d3051c0 adb504f d3051c0 adb504f d3051c0 2fc91ef 54210e7 adb504f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
from fastapi import FastAPI, HTTPException, Depends, Security
from fastapi.security import APIKeyHeader
from fastapi.responses import StreamingResponse
from pydantic import BaseModel, Field
from typing import Literal
import os
from functools import lru_cache
from openai import OpenAI
app = FastAPI()
API_KEY_NAME = "X-API-Key"
API_KEY = os.environ.get("API_KEY", "default_secret_key") # Set this in your environment variables
api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
ModelID = Literal[
"meta-llama/llama-3-70b-instruct",
"anthropic/claude-3.5-sonnet",
"deepseek/deepseek-coder",
"anthropic/claude-3-haiku",
"openai/gpt-3.5-turbo-instruct",
"qwen/qwen-72b-chat",
"google/gemma-2-27b-it"
]
class QueryModel(BaseModel):
user_query: str = Field(..., description="User's coding query")
model_id: ModelID = Field(
default="meta-llama/llama-3-70b-instruct",
description="ID of the model to use for response generation"
)
class Config:
schema_extra = {
"example": {
"user_query": "How do I implement a binary search in Python?",
"model_id": "meta-llama/llama-3-70b-instruct"
}
}
@lru_cache()
def get_api_keys():
return {
"OPENROUTER_API_KEY": f"sk-or-v1-{os.environ['OPENROUTER_API_KEY']}"
}
api_keys = get_api_keys()
or_client = OpenAI(api_key=api_keys["OPENROUTER_API_KEY"], base_url="https://openrouter.ai/api/v1")
def chat_with_llama_stream(messages, model, max_output_tokens=2500):
try:
response = or_client.chat.completions.create(
model=model,
messages=messages,
max_tokens=max_output_tokens,
stream=True
)
for chunk in response:
if chunk.choices[0].delta.content is not None:
yield chunk.choices[0].delta.content
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error in model response: {str(e)}")
async def verify_api_key(api_key: str = Security(api_key_header)):
if api_key != API_KEY:
raise HTTPException(status_code=403, detail="Could not validate credentials")
return api_key
@app.post("/coding-assistant")
async def coding_assistant(query: QueryModel, api_key: str = Depends(verify_api_key)):
"""
Coding assistant endpoint that provides programming help based on user queries.
Available models:
- meta-llama/llama-3-70b-instruct (default)
- anthropic/claude-3.5-sonnet
- deepseek/deepseek-coder
- anthropic/claude-3-haiku
- openai/gpt-3.5-turbo-instruct
- qwen/qwen-72b-chat
- google/gemma-2-27b-it
Requires API Key authentication via X-API-Key header.
"""
system_prompt = "You are a helpful assistant proficient in coding tasks. Help the user in understanding and writing code."
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": query.user_query}
]
return StreamingResponse(
chat_with_llama_stream(messages, model=query.model_id),
media_type="text/event-stream"
)
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860) |