Uhhy commited on
Commit
c2c6b95
verified
1 Parent(s): fe63409

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -16
app.py CHANGED
@@ -5,7 +5,6 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
5
  from tqdm import tqdm
6
  import uvicorn
7
  from dotenv import load_dotenv
8
- from difflib import SequenceMatcher
9
  import re
10
  import spaces
11
 
@@ -44,30 +43,27 @@ class ModelManager:
44
  self.models = []
45
  self.loaded = False
46
 
 
47
  def load_model(self, model_config):
48
- print(f"Cargando modelo: {model_config['name']}...")
49
  return {"model": Llama.from_pretrained(repo_id=model_config['repo_id'], filename=model_config['filename']), "name": model_config['name']}
50
 
 
51
  def load_all_models(self):
52
  if self.loaded:
53
- print("Modelos ya est谩n cargados. No es necesario volver a cargarlos.")
54
  return self.models
55
 
56
- print("Iniciando carga de modelos...")
57
  with ThreadPoolExecutor() as executor:
58
  futures = [executor.submit(self.load_model, config) for config in model_configs]
59
  models = []
60
- for future in tqdm(as_completed(futures), total=len(model_configs), desc="Cargando modelos", unit="modelo"):
61
  try:
62
  model = future.result()
63
  models.append(model)
64
- print(f"Modelo cargado exitosamente: {model['name']}")
65
  except Exception as e:
66
- print(f"Error al cargar el modelo: {e}")
67
 
68
  self.models = models
69
  self.loaded = True
70
- print("Todos los modelos han sido cargados.")
71
  return self.models
72
 
73
  model_manager = ModelManager()
@@ -118,7 +114,6 @@ def remove_repetitive_responses(responses):
118
  return unique_responses
119
 
120
  def select_best_response(responses):
121
- print("Filtrando respuestas...")
122
  responses = remove_repetitive_responses(responses)
123
  responses = [remove_duplicates(response['response']) for response in responses]
124
  unique_responses = list(dict.fromkeys(responses))
@@ -130,26 +125,22 @@ async def generate_chat(request: ChatRequest):
130
  if not request.message.strip():
131
  raise HTTPException(status_code=400, detail="The message cannot be empty.")
132
 
133
- print(f"Procesando solicitud: {request.message}")
134
-
135
  responses = []
136
  num_models = len(global_data['models'])
137
 
138
  with ThreadPoolExecutor() as executor:
139
  futures = [executor.submit(generate_chat_response, request, model_data) for model_data in global_data['models']]
140
- for future in tqdm(as_completed(futures), total=num_models, desc="Generando respuestas", unit="modelo"):
141
  try:
142
  response = future.result()
143
  responses.append(response)
144
  except Exception as exc:
145
- print(f"Error en la generaci贸n de respuesta: {exc}")
146
 
147
  if not responses:
148
  raise HTTPException(status_code=500, detail="Error: No se generaron respuestas.")
149
 
150
  best_response = select_best_response(responses)
151
-
152
- print(f"Mejor respuesta seleccionada: {best_response}")
153
 
154
  return {
155
  "best_response": best_response,
@@ -157,4 +148,4 @@ async def generate_chat(request: ChatRequest):
157
  }
158
 
159
  if __name__ == "__main__":
160
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
5
  from tqdm import tqdm
6
  import uvicorn
7
  from dotenv import load_dotenv
 
8
  import re
9
  import spaces
10
 
 
43
  self.models = []
44
  self.loaded = False
45
 
46
+ @spaces.GPU(duration=0)
47
  def load_model(self, model_config):
 
48
  return {"model": Llama.from_pretrained(repo_id=model_config['repo_id'], filename=model_config['filename']), "name": model_config['name']}
49
 
50
+ @spaces.GPU(duration=0)
51
  def load_all_models(self):
52
  if self.loaded:
 
53
  return self.models
54
 
 
55
  with ThreadPoolExecutor() as executor:
56
  futures = [executor.submit(self.load_model, config) for config in model_configs]
57
  models = []
58
+ for future in as_completed(futures):
59
  try:
60
  model = future.result()
61
  models.append(model)
 
62
  except Exception as e:
63
+ pass
64
 
65
  self.models = models
66
  self.loaded = True
 
67
  return self.models
68
 
69
  model_manager = ModelManager()
 
114
  return unique_responses
115
 
116
  def select_best_response(responses):
 
117
  responses = remove_repetitive_responses(responses)
118
  responses = [remove_duplicates(response['response']) for response in responses]
119
  unique_responses = list(dict.fromkeys(responses))
 
125
  if not request.message.strip():
126
  raise HTTPException(status_code=400, detail="The message cannot be empty.")
127
 
 
 
128
  responses = []
129
  num_models = len(global_data['models'])
130
 
131
  with ThreadPoolExecutor() as executor:
132
  futures = [executor.submit(generate_chat_response, request, model_data) for model_data in global_data['models']]
133
+ for future in tqdm(as_completed(futures), total=num_models):
134
  try:
135
  response = future.result()
136
  responses.append(response)
137
  except Exception as exc:
138
+ pass
139
 
140
  if not responses:
141
  raise HTTPException(status_code=500, detail="Error: No se generaron respuestas.")
142
 
143
  best_response = select_best_response(responses)
 
 
144
 
145
  return {
146
  "best_response": best_response,
 
148
  }
149
 
150
  if __name__ == "__main__":
151
+ uvicorn.run(app, host="0.0.0.0", port=8000)