Spaces:

andresdegante
/

papalia3

Sleeping

App Files Files Community

papalia3 / app.py

andresdegante

Initial commit: Add Llama3-Papalia inference UI and API

f2139e9 3 months ago

raw

history blame

3.72 kB

	from fastapi import FastAPI, HTTPException, Request
	from fastapi.responses import HTMLResponse, JSONResponse
	from fastapi.staticfiles import StaticFiles
	from fastapi.templating import Jinja2Templates
	from fastapi.middleware.cors import CORSMiddleware
	from pydantic import BaseModel
	import httpx
	import os
	import logging
	from typing import Optional

	# Configurar logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	app = FastAPI(
	title="Llama3-Papalia Inference API & UI",
	description="API y UI para interactuar con el modelo Llama3-Papalia especializado en Desarrollo Humano",
	version="1.0.0"
	)

	# Habilitar CORS
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# Configuración de plantillas HTML
	templates = Jinja2Templates(directory="templates")

	class QueryRequest(BaseModel):
	prompt: str
	temperature: Optional[float] = 0.7
	max_tokens: Optional[int] = 500

	class QueryResponse(BaseModel):
	response: str
	model: str = "llama3-papalia-nuevo"

	OLLAMA_API_URL = "http://localhost:11434/api/generate"

	@app.get("/", response_class=HTMLResponse)
	async def read_root(request: Request):
	return templates.TemplateResponse(
	"index.html",
	{"request": request, "title": "Llama3-Papalia Inference"}
	)

	@app.post("/generate", response_model=QueryResponse)
	async def generate_response(query: QueryRequest):
	logger.info(f"Recibida solicitud de generación con prompt: {query.prompt[:50]}...")

	try:
	async with httpx.AsyncClient(timeout=30.0) as client:
	logger.info(f"Enviando solicitud a Ollama: {OLLAMA_API_URL}")
	response = await client.post(
	OLLAMA_API_URL,
	json={
	"model": "llama3-papalia-nuevo",
	"prompt": query.prompt,
	"temperature": query.temperature,
	"max_tokens": query.max_tokens
	}
	)

	logger.info(f"Respuesta de Ollama recibida con status code: {response.status_code}")

	if response.status_code != 200:
	logger.error(f"Error en la respuesta de Ollama: {response.text}")
	raise HTTPException(
	status_code=500,
	detail=f"Error en la generación con Ollama: {response.text}"
	)

	result = response.json()
	logger.info("Respuesta procesada exitosamente")
	return QueryResponse(response=result["response"])

	except httpx.TimeoutException:
	logger.error("Timeout al conectar con Ollama")
	raise HTTPException(
	status_code=504,
	detail="Timeout al conectar con el servicio de Ollama"
	)
	except Exception as e:
	logger.error(f"Error inesperado: {str(e)}")
	raise HTTPException(
	status_code=500,
	detail=f"Error en el servidor: {str(e)}"
	)

	@app.get("/health")
	async def health_check():
	try:
	async with httpx.AsyncClient(timeout=5.0) as client:
	response = await client.post(
	OLLAMA_API_URL,
	json={
	"model": "llama3-papalia-nuevo",
	"prompt": "test",
	"max_tokens": 1
	}
	)
	if response.status_code == 200:
	return {"status": "healthy", "ollama_status": "connected"}
	except Exception as e:
	logger.error(f"Error en health check: {str(e)}")
	return {"status": "unhealthy", "error": str(e)}