File size: 2,513 Bytes
f2139e9
2681e2a
f2139e9
 
 
 
 
62ad6d3
f2139e9
 
 
 
2681e2a
13c15b7
f2139e9
 
 
 
 
 
 
 
 
d5e886a
f2139e9
2681e2a
 
 
 
 
 
 
62ad6d3
2681e2a
 
 
 
 
 
 
f2139e9
 
 
2681e2a
f2139e9
d6b2e8d
f2139e9
 
2681e2a
 
 
 
d5e886a
f2139e9
 
2681e2a
 
 
f2139e9
d5e886a
f2139e9
d6b2e8d
13c15b7
f2139e9
 
 
2681e2a
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import HTMLResponse
from fastapi.templating import Jinja2Templates
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import httpx
import logging
from typing import Optional, Dict, Any

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = FastAPI()
app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"])
templates = Jinja2Templates(directory="templates")

class QueryRequest(BaseModel):
    prompt: str
    temperature: Optional[float] = 0.7
    max_tokens: Optional[int] = 500

class QueryResponse(BaseModel):
    response: str
    model: str = "llama3.2:1b-papalia"

async def generate_with_retries(client: httpx.AsyncClient, data: dict, max_retries: int = 3) -> dict:
    for attempt in range(max_retries):
        try:
            response = await client.post(
                "http://localhost:11434/api/generate",
                json=data,
                timeout=90.0
            )
            response.raise_for_status()
            return response.json()
        except Exception as e:
            if attempt == max_retries - 1:
                raise
            await httpx.AsyncClient().aclose()
            continue

@app.get("/", response_class=HTMLResponse)
async def read_root(request: Request):
    return templates.TemplateResponse("index.html", {"request": request})

@app.post("/generate")
async def generate_response(query: QueryRequest):
    try:
        async with httpx.AsyncClient(timeout=90.0) as client:
            result = await generate_with_retries(
                client,
                {
                    "model": "llama3.2:1b-papalia",
                    "prompt": query.prompt,
                    "temperature": query.temperature,
                    "max_tokens": query.max_tokens,
                    "stream": False
                }
            )
            return {"response": result.get("response", ""), "model": "llama3.2:1b-papalia"}
    except Exception as e:
        logger.error(f"Error: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/health")
async def health_check():
    try:
        async with httpx.AsyncClient(timeout=5.0) as client:
            await client.get("http://localhost:11434")
            return {"status": "healthy"}
    except Exception as e:
        return {"status": "unhealthy", "error": str(e)}