Hjgugugjhuhjggg
commited on
Commit
•
1c3034c
1
Parent(s):
d84cd10
Update app.py
Browse files
app.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import os
|
2 |
import re
|
3 |
-
import requests
|
4 |
import json
|
|
|
5 |
from fastapi import FastAPI, HTTPException
|
6 |
from pydantic import BaseModel
|
7 |
from google.auth import exceptions
|
@@ -18,28 +18,35 @@ GCS_BUCKET_NAME = os.getenv("GCS_BUCKET_NAME")
|
|
18 |
GOOGLE_APPLICATION_CREDENTIALS_JSON = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON")
|
19 |
HF_API_TOKEN = os.getenv("HF_API_TOKEN")
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
def validate_bucket_name(bucket_name):
|
22 |
-
|
23 |
-
raise ValueError("Bucket name must be a string.")
|
24 |
-
if len(bucket_name) < 3 or len(bucket_name) > 63:
|
25 |
-
raise ValueError("Bucket name must be between 3 and 63 characters long.")
|
26 |
if not re.match(r"^[a-z0-9][a-z0-9\-\.]*[a-z0-9]$", bucket_name):
|
27 |
-
raise ValueError(
|
28 |
-
f"Invalid bucket name '{bucket_name}'. Bucket names must:"
|
29 |
-
" - Use only lowercase letters, numbers, hyphens (-), and periods (.)"
|
30 |
-
" - Start and end with a letter or number."
|
31 |
-
)
|
32 |
-
if "--" in bucket_name or ".." in bucket_name or ".-" in bucket_name or "-." in bucket_name:
|
33 |
-
raise ValueError(
|
34 |
-
f"Invalid bucket name '{bucket_name}'. Bucket names cannot contain consecutive periods, hyphens, or use '.-' or '-.'"
|
35 |
-
)
|
36 |
return bucket_name
|
37 |
|
38 |
try:
|
|
|
|
|
39 |
GCS_BUCKET_NAME = validate_bucket_name(GCS_BUCKET_NAME)
|
|
|
|
|
40 |
credentials_info = json.loads(GOOGLE_APPLICATION_CREDENTIALS_JSON)
|
41 |
storage_client = storage.Client.from_service_account_info(credentials_info)
|
42 |
bucket = storage_client.bucket(GCS_BUCKET_NAME)
|
|
|
43 |
except (exceptions.DefaultCredentialsError, json.JSONDecodeError, KeyError, ValueError) as e:
|
44 |
print(f"Error al cargar credenciales o bucket: {e}")
|
45 |
exit(1)
|
@@ -61,7 +68,7 @@ class GCSStreamHandler:
|
|
61 |
def stream_file_from_gcs(self, blob_name):
|
62 |
blob = self.bucket.blob(blob_name)
|
63 |
if not blob.exists():
|
64 |
-
raise HTTPException(status_code=404, detail=f"
|
65 |
return blob.download_as_bytes()
|
66 |
|
67 |
def upload_file_to_gcs(self, blob_name, data_stream):
|
@@ -103,7 +110,7 @@ def download_model_from_huggingface(model_name):
|
|
103 |
blob_name = f"{model_name}/{filename}"
|
104 |
blob = bucket.blob(blob_name)
|
105 |
blob.upload_from_file(BytesIO(response.content))
|
106 |
-
except Exception
|
107 |
pass
|
108 |
|
109 |
@app.post("/predict/")
|
@@ -127,6 +134,8 @@ async def predict(request: DownloadModelRequest):
|
|
127 |
model = AutoModelForCausalLM.from_pretrained(BytesIO(config_stream))
|
128 |
tokenizer = AutoTokenizer.from_pretrained(BytesIO(tokenizer_stream))
|
129 |
pipeline_task = request.pipeline_task
|
|
|
|
|
130 |
pipeline_ = pipeline(pipeline_task, model=model, tokenizer=tokenizer)
|
131 |
input_text = request.input_text
|
132 |
result = pipeline_(input_text)
|
|
|
1 |
import os
|
2 |
import re
|
|
|
3 |
import json
|
4 |
+
import requests
|
5 |
from fastapi import FastAPI, HTTPException
|
6 |
from pydantic import BaseModel
|
7 |
from google.auth import exceptions
|
|
|
18 |
GOOGLE_APPLICATION_CREDENTIALS_JSON = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON")
|
19 |
HF_API_TOKEN = os.getenv("HF_API_TOKEN")
|
20 |
|
21 |
+
def sanitize_bucket_name(bucket_name):
|
22 |
+
"""Corrige un nombre de bucket inválido para ajustarse a las reglas de Google Cloud Storage."""
|
23 |
+
bucket_name = bucket_name.lower()
|
24 |
+
bucket_name = re.sub(r"[^a-z0-9-\.]", "-", bucket_name)
|
25 |
+
bucket_name = bucket_name.strip("-.")
|
26 |
+
if len(bucket_name) > 63:
|
27 |
+
bucket_name = bucket_name[:63]
|
28 |
+
if not re.match(r"^[a-z0-9]", bucket_name):
|
29 |
+
bucket_name = "a" + bucket_name
|
30 |
+
if not re.match(r"[a-z0-9]$", bucket_name):
|
31 |
+
bucket_name = bucket_name + "a"
|
32 |
+
return bucket_name
|
33 |
+
|
34 |
def validate_bucket_name(bucket_name):
|
35 |
+
"""Valida si el nombre de bucket cumple con las reglas de Google Cloud Storage."""
|
|
|
|
|
|
|
36 |
if not re.match(r"^[a-z0-9][a-z0-9\-\.]*[a-z0-9]$", bucket_name):
|
37 |
+
raise ValueError(f"Nombre de bucket inválido: '{bucket_name}'. Debe cumplir con las reglas de GCS.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
return bucket_name
|
39 |
|
40 |
try:
|
41 |
+
# Sanitizar y validar el nombre del bucket
|
42 |
+
GCS_BUCKET_NAME = sanitize_bucket_name(GCS_BUCKET_NAME)
|
43 |
GCS_BUCKET_NAME = validate_bucket_name(GCS_BUCKET_NAME)
|
44 |
+
|
45 |
+
# Cargar credenciales de Google Cloud Storage
|
46 |
credentials_info = json.loads(GOOGLE_APPLICATION_CREDENTIALS_JSON)
|
47 |
storage_client = storage.Client.from_service_account_info(credentials_info)
|
48 |
bucket = storage_client.bucket(GCS_BUCKET_NAME)
|
49 |
+
|
50 |
except (exceptions.DefaultCredentialsError, json.JSONDecodeError, KeyError, ValueError) as e:
|
51 |
print(f"Error al cargar credenciales o bucket: {e}")
|
52 |
exit(1)
|
|
|
68 |
def stream_file_from_gcs(self, blob_name):
|
69 |
blob = self.bucket.blob(blob_name)
|
70 |
if not blob.exists():
|
71 |
+
raise HTTPException(status_code=404, detail=f"Archivo '{blob_name}' no encontrado en GCS.")
|
72 |
return blob.download_as_bytes()
|
73 |
|
74 |
def upload_file_to_gcs(self, blob_name, data_stream):
|
|
|
110 |
blob_name = f"{model_name}/{filename}"
|
111 |
blob = bucket.blob(blob_name)
|
112 |
blob.upload_from_file(BytesIO(response.content))
|
113 |
+
except Exception:
|
114 |
pass
|
115 |
|
116 |
@app.post("/predict/")
|
|
|
134 |
model = AutoModelForCausalLM.from_pretrained(BytesIO(config_stream))
|
135 |
tokenizer = AutoTokenizer.from_pretrained(BytesIO(tokenizer_stream))
|
136 |
pipeline_task = request.pipeline_task
|
137 |
+
if pipeline_task not in ["text-generation", "sentiment-analysis", "translation", "fill-mask", "question-answering"]:
|
138 |
+
raise HTTPException(status_code=400, detail="Tarea no soportada")
|
139 |
pipeline_ = pipeline(pipeline_task, model=model, tokenizer=tokenizer)
|
140 |
input_text = request.input_text
|
141 |
result = pipeline_(input_text)
|