Spaces:
Sleeping
Sleeping
from fastapi import FastAPI, HTTPException, Request | |
from fastapi.responses import HTMLResponse, JSONResponse | |
from fastapi.staticfiles import StaticFiles | |
from fastapi.templating import Jinja2Templates | |
from fastapi.middleware.cors import CORSMiddleware | |
from pydantic import BaseModel | |
import httpx | |
import os | |
import logging | |
from typing import Optional | |
# Configurar logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
app = FastAPI( | |
title="Llama3-Papalia Inference API & UI", | |
description="API y UI para interactuar con el modelo Llama3-Papalia especializado en Desarrollo Humano", | |
version="1.0.0" | |
) | |
# Habilitar CORS | |
app.add_middleware( | |
CORSMiddleware, | |
allow_origins=["*"], | |
allow_credentials=True, | |
allow_methods=["*"], | |
allow_headers=["*"], | |
) | |
# Configuraci贸n de plantillas HTML | |
templates = Jinja2Templates(directory="templates") | |
class QueryRequest(BaseModel): | |
prompt: str | |
temperature: Optional[float] = 0.7 | |
max_tokens: Optional[int] = 500 | |
class QueryResponse(BaseModel): | |
response: str | |
model: str = "llama3-papalia-nuevo" | |
OLLAMA_API_URL = "http://localhost:11434/api/generate" | |
async def read_root(request: Request): | |
return templates.TemplateResponse( | |
"index.html", | |
{"request": request, "title": "Llama3-Papalia Inference"} | |
) | |
async def generate_response(query: QueryRequest): | |
logger.info(f"Recibida solicitud de generaci贸n con prompt: {query.prompt[:50]}...") | |
try: | |
async with httpx.AsyncClient(timeout=30.0) as client: | |
logger.info(f"Enviando solicitud a Ollama: {OLLAMA_API_URL}") | |
response = await client.post( | |
OLLAMA_API_URL, | |
json={ | |
"model": "llama3-papalia-nuevo", | |
"prompt": query.prompt, | |
"temperature": query.temperature, | |
"max_tokens": query.max_tokens | |
} | |
) | |
logger.info(f"Respuesta de Ollama recibida con status code: {response.status_code}") | |
if response.status_code != 200: | |
logger.error(f"Error en la respuesta de Ollama: {response.text}") | |
raise HTTPException( | |
status_code=500, | |
detail=f"Error en la generaci贸n con Ollama: {response.text}" | |
) | |
result = response.json() | |
logger.info("Respuesta procesada exitosamente") | |
return QueryResponse(response=result["response"]) | |
except httpx.TimeoutException: | |
logger.error("Timeout al conectar con Ollama") | |
raise HTTPException( | |
status_code=504, | |
detail="Timeout al conectar con el servicio de Ollama" | |
) | |
except Exception as e: | |
logger.error(f"Error inesperado: {str(e)}") | |
raise HTTPException( | |
status_code=500, | |
detail=f"Error en el servidor: {str(e)}" | |
) | |
async def health_check(): | |
try: | |
async with httpx.AsyncClient(timeout=5.0) as client: | |
response = await client.post( | |
OLLAMA_API_URL, | |
json={ | |
"model": "llama3-papalia-nuevo", | |
"prompt": "test", | |
"max_tokens": 1 | |
} | |
) | |
if response.status_code == 200: | |
return {"status": "healthy", "ollama_status": "connected"} | |
except Exception as e: | |
logger.error(f"Error en health check: {str(e)}") | |
return {"status": "unhealthy", "error": str(e)} | |