Spaces:

lilmeaty
/

gcs

Sleeping

App Files Files Community

gcs / app.py

Hjgugugjhuhjggg

Update app.py

4bf1bd9 verified 28 days ago

raw

history blame

8.56 kB

	import os
	import logging
	import requests
	import threading
	from io import BytesIO
	from fastapi import FastAPI, HTTPException, Response, Request
	from fastapi.responses import StreamingResponse
	from pydantic import BaseModel
	from transformers import (
	AutoConfig,
	AutoModelForCausalLM,
	AutoTokenizer,
	pipeline,
	GenerationConfig
	)
	import boto3
	from huggingface_hub import hf_hub_download
	import soundfile as sf
	import numpy as np
	import torch
	import uvicorn
	from tqdm import tqdm

	logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

	AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
	AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
	AWS_REGION = os.getenv("AWS_REGION")
	S3_BUCKET_NAME = os.getenv("S3_BUCKET_NAME")
	HUGGINGFACE_HUB_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN")

	class GenerateRequest(BaseModel):
	model_name: str
	input_text: str
	task_type: str
	temperature: float = 1.0
	max_new_tokens: int = 200
	stream: bool = False
	top_p: float = 1.0
	top_k: int = 50
	repetition_penalty: float = 1.0
	num_return_sequences: int = 1
	do_sample: bool = True

	class S3ModelLoader:
	def __init__(self, bucket_name, s3_client):
	self.bucket_name = bucket_name
	self.s3_client = s3_client

	def _get_s3_uri(self, model_name):
	return f"s3://{self.bucket_name}/{model_name.replace('/', '-')}"

	def download_model_from_s3(self, model_name):
	try:
	logging.info(f"Trying to load {model_name} from S3...")
	config = AutoConfig.from_pretrained(f"s3://{self.bucket_name}/{model_name}")
	model = AutoModelForCausalLM.from_pretrained(f"s3://{self.bucket_name}/{model_name}", config=config)
	tokenizer = AutoTokenizer.from_pretrained(f"s3://{self.bucket_name}/{model_name}")
	logging.info(f"Loaded {model_name} from S3 successfully.")
	return model, tokenizer
	except Exception as e:
	logging.error(f"Error loading {model_name} from S3: {e}")
	return None, None

	async def load_model_and_tokenizer(self, model_name):
	try:
	model, tokenizer = self.download_model_from_s3(model_name)
	if model is None or tokenizer is None:
	model, tokenizer = await self.download_and_save_model_from_huggingface(model_name)
	return model, tokenizer
	except Exception as e:
	raise HTTPException(status_code=500, detail=f"Error loading model: {e}")

	async def download_and_save_model_from_huggingface(self, model_name):
	try:
	logging.info(f"Downloading {model_name} from Hugging Face...")
	with tqdm(unit="B", unit_scale=True, desc=f"Downloading {model_name}") as t:
	model = AutoModelForCausalLM.from_pretrained(model_name, token=HUGGINGFACE_HUB_TOKEN, _tqdm=t)
	tokenizer = AutoTokenizer.from_pretrained(model_name, token=HUGGINGFACE_HUB_TOKEN)
	logging.info(f"Downloaded {model_name} successfully.")
	self.upload_model_to_s3(model_name, model, tokenizer)
	return model, tokenizer
	except Exception as e:
	logging.error(f"Error downloading model from Hugging Face: {e}")
	raise HTTPException(status_code=500, detail=f"Error downloading model from Hugging Face: {e}")

	def upload_model_to_s3(self, model_name, model, tokenizer):
	try:
	s3_uri = self._get_s3_uri(model_name)
	model.save_pretrained(s3_uri)
	tokenizer.save_pretrained(s3_uri)
	logging.info(f"Saved {model_name} to S3 successfully.")
	except Exception as e:
	logging.error(f"Error saving {model_name} to S3: {e}")
	raise HTTPException(status_code=500, detail=f"Error saving model to S3: {e}")

	app = FastAPI()

	s3_client = boto3.client('s3', aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY, region_name=AWS_REGION)
	model_loader = S3ModelLoader(S3_BUCKET_NAME, s3_client)

	@app.post("/generate")
	async def generate(request: Request, body: GenerateRequest):
	try:
	model, tokenizer = await model_loader.load_model_and_tokenizer(body.model_name)
	device = "cuda" if torch.cuda.is_available() else "cpu"
	model.to(device)

	if body.task_type == "text-to-text":
	generation_config = GenerationConfig(
	temperature=body.temperature,
	max_new_tokens=body.max_new_tokens,
	top_p=body.top_p,
	top_k=body.top_k,
	repetition_penalty=body.repetition_penalty,
	do_sample=body.do_sample,
	num_return_sequences=body.num_return_sequences
	)

	async def stream_text():
	input_text = body.input_text
	max_length = model.config.max_position_embeddings
	generated_text = ""

	while True:
	inputs = tokenizer(input_text, return_tensors="pt").to(device)
	input_length = inputs.input_ids.shape[1]
	remaining_tokens = max_length - input_length
	if remaining_tokens < body.max_new_tokens:
	generation_config.max_new_tokens = remaining_tokens
	if remaining_tokens <= 0:
	break

	output = model.generate(**inputs, generation_config=generation_config)
	chunk = tokenizer.decode(output[0], skip_special_tokens=True)
	generated_text += chunk
	yield chunk
	if len(tokenizer.encode(generated_text)) >= max_length:
	break
	input_text = chunk

	if body.stream:
	return StreamingResponse(stream_text(), media_type="text/plain")
	else:
	generated_text = ""
	async for chunk in stream_text():
	generated_text += chunk
	return {"result": generated_text}

	elif body.task_type == "text-to-image":
	generator = pipeline("text-to-image", model=model, tokenizer=tokenizer, device=device)
	image = generator(body.input_text)[0]
	image_bytes = image.tobytes()
	return Response(content=image_bytes, media_type="image/png")

	elif body.task_type == "text-to-speech":
	generator = pipeline("text-to-speech", model=model, tokenizer=tokenizer, device=device)
	audio = generator(body.input_text)
	audio_bytesio = BytesIO()
	sf.write(audio_bytesio, audio["sampling_rate"], np.int16(audio["audio"]))
	audio_bytes = audio_bytesio.getvalue()
	return Response(content=audio_bytes, media_type="audio/wav")

	elif body.task_type == "text-to-video":
	try:
	generator = pipeline("text-to-video", model=model, tokenizer=tokenizer, device=device)
	video = generator(body.input_text)
	return Response(content=video, media_type="video/mp4")
	except Exception as e:
	raise HTTPException(status_code=500, detail=f"Error in text-to-video generation: {e}")

	else:
	raise HTTPException(status_code=400, detail="Unsupported task type")

	except HTTPException as e:
	raise e
	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))

	def download_all_models_in_background():
	models_url = "https://huggingface.co/api/models"
	try:
	response = requests.get(models_url)
	if response.status_code != 200:
	logging.error("Error al obtener la lista de modelos de Hugging Face.")
	raise HTTPException(status_code=500, detail="Error al obtener la lista de modelos.")

	models = response.json()
	for model in models:
	model_name = model["id"]
	model_loader.download_and_save_model_from_huggingface(model_name)
	except Exception as e:
	logging.error(f"Error al descargar modelos en segundo plano: {e}")
	raise HTTPException(status_code=500, detail="Error al descargar modelos en segundo plano.")

	def run_in_background():
	threading.Thread(target=download_all_models_in_background, daemon=True).start()

	@app.on_event("startup")
	async def startup_event():
	run_in_background()

	if __name__ == "__main__":
	uvicorn.run(app, host="0.0.0.0", port=8000)