papalia3 / app.py
andresdegante's picture
Initial commit: Add Llama3-Papalia inference UI and API
f2139e9
raw
history blame
3.72 kB
from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import HTMLResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import httpx
import os
import logging
from typing import Optional
# Configurar logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
app = FastAPI(
title="Llama3-Papalia Inference API & UI",
description="API y UI para interactuar con el modelo Llama3-Papalia especializado en Desarrollo Humano",
version="1.0.0"
)
# Habilitar CORS
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Configuraci贸n de plantillas HTML
templates = Jinja2Templates(directory="templates")
class QueryRequest(BaseModel):
prompt: str
temperature: Optional[float] = 0.7
max_tokens: Optional[int] = 500
class QueryResponse(BaseModel):
response: str
model: str = "llama3-papalia-nuevo"
OLLAMA_API_URL = "http://localhost:11434/api/generate"
@app.get("/", response_class=HTMLResponse)
async def read_root(request: Request):
return templates.TemplateResponse(
"index.html",
{"request": request, "title": "Llama3-Papalia Inference"}
)
@app.post("/generate", response_model=QueryResponse)
async def generate_response(query: QueryRequest):
logger.info(f"Recibida solicitud de generaci贸n con prompt: {query.prompt[:50]}...")
try:
async with httpx.AsyncClient(timeout=30.0) as client:
logger.info(f"Enviando solicitud a Ollama: {OLLAMA_API_URL}")
response = await client.post(
OLLAMA_API_URL,
json={
"model": "llama3-papalia-nuevo",
"prompt": query.prompt,
"temperature": query.temperature,
"max_tokens": query.max_tokens
}
)
logger.info(f"Respuesta de Ollama recibida con status code: {response.status_code}")
if response.status_code != 200:
logger.error(f"Error en la respuesta de Ollama: {response.text}")
raise HTTPException(
status_code=500,
detail=f"Error en la generaci贸n con Ollama: {response.text}"
)
result = response.json()
logger.info("Respuesta procesada exitosamente")
return QueryResponse(response=result["response"])
except httpx.TimeoutException:
logger.error("Timeout al conectar con Ollama")
raise HTTPException(
status_code=504,
detail="Timeout al conectar con el servicio de Ollama"
)
except Exception as e:
logger.error(f"Error inesperado: {str(e)}")
raise HTTPException(
status_code=500,
detail=f"Error en el servidor: {str(e)}"
)
@app.get("/health")
async def health_check():
try:
async with httpx.AsyncClient(timeout=5.0) as client:
response = await client.post(
OLLAMA_API_URL,
json={
"model": "llama3-papalia-nuevo",
"prompt": "test",
"max_tokens": 1
}
)
if response.status_code == 200:
return {"status": "healthy", "ollama_status": "connected"}
except Exception as e:
logger.error(f"Error en health check: {str(e)}")
return {"status": "unhealthy", "error": str(e)}