Hjgugugjhuhjggg commited on
Commit
1c3034c
1 Parent(s): d84cd10

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -16
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import os
2
  import re
3
- import requests
4
  import json
 
5
  from fastapi import FastAPI, HTTPException
6
  from pydantic import BaseModel
7
  from google.auth import exceptions
@@ -18,28 +18,35 @@ GCS_BUCKET_NAME = os.getenv("GCS_BUCKET_NAME")
18
  GOOGLE_APPLICATION_CREDENTIALS_JSON = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON")
19
  HF_API_TOKEN = os.getenv("HF_API_TOKEN")
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  def validate_bucket_name(bucket_name):
22
- if not isinstance(bucket_name, str):
23
- raise ValueError("Bucket name must be a string.")
24
- if len(bucket_name) < 3 or len(bucket_name) > 63:
25
- raise ValueError("Bucket name must be between 3 and 63 characters long.")
26
  if not re.match(r"^[a-z0-9][a-z0-9\-\.]*[a-z0-9]$", bucket_name):
27
- raise ValueError(
28
- f"Invalid bucket name '{bucket_name}'. Bucket names must:"
29
- " - Use only lowercase letters, numbers, hyphens (-), and periods (.)"
30
- " - Start and end with a letter or number."
31
- )
32
- if "--" in bucket_name or ".." in bucket_name or ".-" in bucket_name or "-." in bucket_name:
33
- raise ValueError(
34
- f"Invalid bucket name '{bucket_name}'. Bucket names cannot contain consecutive periods, hyphens, or use '.-' or '-.'"
35
- )
36
  return bucket_name
37
 
38
  try:
 
 
39
  GCS_BUCKET_NAME = validate_bucket_name(GCS_BUCKET_NAME)
 
 
40
  credentials_info = json.loads(GOOGLE_APPLICATION_CREDENTIALS_JSON)
41
  storage_client = storage.Client.from_service_account_info(credentials_info)
42
  bucket = storage_client.bucket(GCS_BUCKET_NAME)
 
43
  except (exceptions.DefaultCredentialsError, json.JSONDecodeError, KeyError, ValueError) as e:
44
  print(f"Error al cargar credenciales o bucket: {e}")
45
  exit(1)
@@ -61,7 +68,7 @@ class GCSStreamHandler:
61
  def stream_file_from_gcs(self, blob_name):
62
  blob = self.bucket.blob(blob_name)
63
  if not blob.exists():
64
- raise HTTPException(status_code=404, detail=f"File '{blob_name}' not found in GCS.")
65
  return blob.download_as_bytes()
66
 
67
  def upload_file_to_gcs(self, blob_name, data_stream):
@@ -103,7 +110,7 @@ def download_model_from_huggingface(model_name):
103
  blob_name = f"{model_name}/{filename}"
104
  blob = bucket.blob(blob_name)
105
  blob.upload_from_file(BytesIO(response.content))
106
- except Exception as e:
107
  pass
108
 
109
  @app.post("/predict/")
@@ -127,6 +134,8 @@ async def predict(request: DownloadModelRequest):
127
  model = AutoModelForCausalLM.from_pretrained(BytesIO(config_stream))
128
  tokenizer = AutoTokenizer.from_pretrained(BytesIO(tokenizer_stream))
129
  pipeline_task = request.pipeline_task
 
 
130
  pipeline_ = pipeline(pipeline_task, model=model, tokenizer=tokenizer)
131
  input_text = request.input_text
132
  result = pipeline_(input_text)
 
1
  import os
2
  import re
 
3
  import json
4
+ import requests
5
  from fastapi import FastAPI, HTTPException
6
  from pydantic import BaseModel
7
  from google.auth import exceptions
 
18
  GOOGLE_APPLICATION_CREDENTIALS_JSON = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON")
19
  HF_API_TOKEN = os.getenv("HF_API_TOKEN")
20
 
21
+ def sanitize_bucket_name(bucket_name):
22
+ """Corrige un nombre de bucket inválido para ajustarse a las reglas de Google Cloud Storage."""
23
+ bucket_name = bucket_name.lower()
24
+ bucket_name = re.sub(r"[^a-z0-9-\.]", "-", bucket_name)
25
+ bucket_name = bucket_name.strip("-.")
26
+ if len(bucket_name) > 63:
27
+ bucket_name = bucket_name[:63]
28
+ if not re.match(r"^[a-z0-9]", bucket_name):
29
+ bucket_name = "a" + bucket_name
30
+ if not re.match(r"[a-z0-9]$", bucket_name):
31
+ bucket_name = bucket_name + "a"
32
+ return bucket_name
33
+
34
  def validate_bucket_name(bucket_name):
35
+ """Valida si el nombre de bucket cumple con las reglas de Google Cloud Storage."""
 
 
 
36
  if not re.match(r"^[a-z0-9][a-z0-9\-\.]*[a-z0-9]$", bucket_name):
37
+ raise ValueError(f"Nombre de bucket inválido: '{bucket_name}'. Debe cumplir con las reglas de GCS.")
 
 
 
 
 
 
 
 
38
  return bucket_name
39
 
40
  try:
41
+ # Sanitizar y validar el nombre del bucket
42
+ GCS_BUCKET_NAME = sanitize_bucket_name(GCS_BUCKET_NAME)
43
  GCS_BUCKET_NAME = validate_bucket_name(GCS_BUCKET_NAME)
44
+
45
+ # Cargar credenciales de Google Cloud Storage
46
  credentials_info = json.loads(GOOGLE_APPLICATION_CREDENTIALS_JSON)
47
  storage_client = storage.Client.from_service_account_info(credentials_info)
48
  bucket = storage_client.bucket(GCS_BUCKET_NAME)
49
+
50
  except (exceptions.DefaultCredentialsError, json.JSONDecodeError, KeyError, ValueError) as e:
51
  print(f"Error al cargar credenciales o bucket: {e}")
52
  exit(1)
 
68
  def stream_file_from_gcs(self, blob_name):
69
  blob = self.bucket.blob(blob_name)
70
  if not blob.exists():
71
+ raise HTTPException(status_code=404, detail=f"Archivo '{blob_name}' no encontrado en GCS.")
72
  return blob.download_as_bytes()
73
 
74
  def upload_file_to_gcs(self, blob_name, data_stream):
 
110
  blob_name = f"{model_name}/{filename}"
111
  blob = bucket.blob(blob_name)
112
  blob.upload_from_file(BytesIO(response.content))
113
+ except Exception:
114
  pass
115
 
116
  @app.post("/predict/")
 
134
  model = AutoModelForCausalLM.from_pretrained(BytesIO(config_stream))
135
  tokenizer = AutoTokenizer.from_pretrained(BytesIO(tokenizer_stream))
136
  pipeline_task = request.pipeline_task
137
+ if pipeline_task not in ["text-generation", "sentiment-analysis", "translation", "fill-mask", "question-answering"]:
138
+ raise HTTPException(status_code=400, detail="Tarea no soportada")
139
  pipeline_ = pipeline(pipeline_task, model=model, tokenizer=tokenizer)
140
  input_text = request.input_text
141
  result = pipeline_(input_text)