import os import json import logging import io from fastapi import FastAPI, HTTPException from pydantic import BaseModel from google.cloud import storage from google.auth import exceptions from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer from dotenv import load_dotenv import torch import safetensors.torch import requests from diffusers import StableDiffusionPipeline from audiocraft.models import AudioLM import asyncio import threading import uvicorn from transformers import pipeline as tts_pipeline import soundfile as sf # Para manejar el audio de salida load_dotenv() API_KEY = os.getenv("API_KEY") GCS_BUCKET_NAME = os.getenv("GCS_BUCKET_NAME") GOOGLE_APPLICATION_CREDENTIALS_JSON = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON") HF_API_TOKEN = os.getenv("HF_API_TOKEN") logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) # Configuración de GCS try: credentials_info = json.loads(GOOGLE_APPLICATION_CREDENTIALS_JSON) storage_client = storage.Client.from_service_account_info(credentials_info) bucket = storage_client.bucket(GCS_BUCKET_NAME) logger.info(f"Conexión con Google Cloud Storage exitosa. Bucket: {GCS_BUCKET_NAME}") except (exceptions.DefaultCredentialsError, json.JSONDecodeError, KeyError, ValueError) as e: logger.error(f"Error al cargar las credenciales o bucket: {e}") raise RuntimeError(f"Error al cargar las credenciales o bucket: {e}") app = FastAPI() class DownloadModelRequest(BaseModel): model_name: str pipeline_task: str input_text: str class GCSHandler: def __init__(self, bucket_name): self.bucket = storage_client.bucket(bucket_name) def file_exists(self, blob_name): exists = self.bucket.blob(blob_name).exists() logger.debug(f"Comprobando existencia de archivo '{blob_name}': {exists}") return exists def download_file_as_stream(self, blob_name): blob = self.bucket.blob(blob_name) if not blob.exists(): logger.error(f"Archivo '{blob_name}' no encontrado en GCS.") raise HTTPException(status_code=404, detail=f"File '{blob_name}' not found.") logger.debug(f"Descargando archivo '{blob_name}' de GCS.") return blob.open("rb") # Devuelve un stream (modo lectura binaria) def upload_file(self, blob_name, file_stream): blob = self.bucket.blob(blob_name) try: blob.upload_from_file(file_stream) logger.info(f"Archivo '{blob_name}' subido exitosamente a GCS.") except Exception as e: logger.error(f"Error subiendo el archivo '{blob_name}' a GCS: {e}") raise HTTPException(status_code=500, detail=f"Error subiendo archivo '{blob_name}' a GCS") def generate_signed_url(self, blob_name, expiration=3600): blob = self.bucket.blob(blob_name) url = blob.generate_signed_url(expiration=expiration) logger.debug(f"Generada URL firmada para '{blob_name}': {url}") return url def download_model_from_huggingface(model_name): url = f"https://huggingface.co/{model_name}/tree/main" headers = {"Authorization": f"Bearer {HF_API_TOKEN}"} try: logger.info(f"Descargando el modelo '{model_name}' desde Hugging Face...") response = requests.get(url, headers=headers) if response.status_code == 200: model_files = [ "pytorch_model.bin", "config.json", "tokenizer.json", "model.safetensors", "pytorch_model.bin" ] for file_name in model_files: file_url = f"https://huggingface.co/{model_name}/resolve/main/{file_name}" file_content = requests.get(file_url).content # Subir el archivo directamente desde el contenido blob_name = f"{model_name}/{file_name}" blob = bucket.blob(blob_name) blob.upload_from_string(file_content) logger.info(f"Archivo '{file_name}' subido exitosamente al bucket GCS.") else: logger.error(f"Error al acceder al árbol de archivos de Hugging Face para '{model_name}'.") raise HTTPException(status_code=404, detail="Error al acceder al árbol de archivos de Hugging Face.") except Exception as e: logger.error(f"Error descargando archivos de Hugging Face: {e}") raise HTTPException(status_code=500, detail=f"Error descargando archivos de Hugging Face: {e}") def load_model_from_gcs(model_name, gcs_handler): model_files = { "config": f"{model_name}/config.json", "tokenizer": f"{model_name}/tokenizer.json", "model_bin": f"{model_name}/pytorch_model.bin", "model_safetensors": f"{model_name}/model.safetensors" } model_data = {} for key, blob_name in model_files.items(): if not gcs_handler.file_exists(blob_name): logger.info(f"{key.capitalize()} no encontrado en GCS, descargando desde Hugging Face...") download_model_from_huggingface(model_name) model_data[key] = gcs_handler.download_file_as_stream(blob_name) return model_data def load_diffuser_model_from_streams(model_data, model_name): model_bin_stream = model_data.get("model_bin") model_safetensors_stream = model_data.get("model_safetensors") if model_bin_stream or model_safetensors_stream: # Cargar el modelo de difusión desde los streams de GCS logger.info(f"Cargando modelo Diffusers para '{model_name}'...") pipe = StableDiffusionPipeline.from_pretrained(io.BytesIO(model_bin_stream.read())) else: raise HTTPException(status_code=404, detail="No se encontró modelo compatible en el bucket.") return pipe def load_audiocraft_model_from_streams(model_data, model_name): model_bin_stream = model_data.get("model_bin") model_safetensors_stream = model_data.get("model_safetensors") if model_bin_stream or model_safetensors_stream: # Cargar el modelo AudioCraft desde los streams de GCS logger.info(f"Cargando modelo Audiocraft para '{model_name}'...") model = AudioLM.from_pretrained(io.BytesIO(model_bin_stream.read())) else: raise HTTPException(status_code=404, detail="No se encontró modelo compatible en el bucket.") return model @app.post("/predict/") async def predict(request: DownloadModelRequest): logger.info(f"Iniciando predicción para el modelo '{request.model_name}' con tarea '{request.pipeline_task}'...") try: gcs_handler = GCSHandler(GCS_BUCKET_NAME) model_prefix = request.model_name model_data = load_model_from_gcs(model_prefix, gcs_handler) # Cargar los archivos de modelo y tokenizer directamente desde los streams config_stream = model_data["config"] tokenizer_stream = model_data["tokenizer"] if request.pipeline_task == "text-generation": # Usar el modelo HuggingFace normal si es una tarea de texto model = load_model_from_streams(model_data, model_prefix) tokenizer = AutoTokenizer.from_pretrained(io.BytesIO(tokenizer_stream.read())) pipe = pipeline(request.pipeline_task, model=model, tokenizer=tokenizer) result = pipe(request.input_text) elif request.pipeline_task == "image-generation": # Usar el modelo Diffuser si es tarea de generación de imágenes pipe = load_diffuser_model_from_streams(model_data, model_prefix) result = pipe(request.input_text).images elif request.pipeline_task == "audio-generation": # Usar el modelo Audiocraft si es tarea de generación de audio model = load_audiocraft_model_from_streams(model_data, model_prefix) result = model.generate(request.input_text) elif request.pipeline_task == "text-to-speech": # TTS pipeline utilizando transformers tts_pipe = tts_pipeline("text-to-speech", model=model, tokenizer=tokenizer) audio_output = tts_pipe(request.input_text)[0]['audio'] # Se devuelve el archivo de audio audio_path = "output.wav" sf.write(audio_path, audio_output, 16000) # Guardar el audio en un archivo result = audio_path elif request.pipeline_task == "text-to-audio": # Usar audiocraft o modelo específico para text-to-audio model = load_audiocraft_model_from_streams(model_data, model_prefix) audio_output = model.generate(request.input_text) # Guardar o procesar el audio de salida audio_path = "output_audio.wav" sf.write(audio_path, audio_output, 16000) result = audio_path else: raise HTTPException(status_code=400, detail="Tarea no soportada.") logger.info(f"Resultado generado para la tarea '{request.pipeline_task}': {result[0]}") return {"response": result[0]} except HTTPException as e: logger.error(f"HTTPException: {e.detail}") raise e except Exception as e: logger.error(f"Error inesperado: {e}") raise HTTPException(status_code=500, detail=f"Error: {e}") def download_model_in_background(model_name): try: gcs_handler = GCSHandler(GCS_BUCKET_NAME) logger.info(f"Iniciando descarga en segundo plano del modelo '{model_name}' a GCS...") download_model_from_huggingface(model_name) logger.info(f"Descarga del modelo '{model_name}' completada.") except Exception as e: logger.error(f"Error al descargar el modelo '{model_name}' en segundo plano: {e}") def run_in_background(): logger.info("Iniciando la descarga de modelos en segundo plano...") threading.Thread(target=download_model_in_background, args=("modelo_ejemplo",)).start() @app.on_event("startup") async def startup_event(): run_in_background() if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=7860)