Spaces:
Sleeping
Sleeping
JaphetHernandez
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -7,64 +7,42 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
|
7 |
from huggingface_hub import login
|
8 |
import torch
|
9 |
import json
|
|
|
10 |
from datetime import datetime
|
11 |
|
|
|
|
|
|
|
12 |
# Autenticaci贸n con Fireworks en Hugging Face
|
13 |
huggingface_token = st.secrets["FIREWORKS"]
|
14 |
login(huggingface_token)
|
15 |
|
16 |
-
# Configurar modelo Fireworks con cuantizaci贸n int8
|
17 |
quant_config = BitsAndBytesConfig(
|
18 |
load_in_8bit=True, # Activar la carga en int8
|
19 |
-
llm_int8_enable_fp32_cpu_offload=
|
20 |
quantization_scheme="gptq" # Especificar el esquema GPTQ
|
21 |
)
|
22 |
|
23 |
model_id = "fireworks-ai/firefunction-v2"
|
24 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
25 |
model = AutoModelForCausalLM.from_pretrained(
|
26 |
-
model_id,
|
27 |
-
device_map=
|
28 |
torch_dtype=torch.float16,
|
29 |
quantization_config=quant_config
|
30 |
)
|
31 |
|
32 |
-
# Asegurarte de mover el modelo a CUDA
|
33 |
-
model.to("cuda")
|
34 |
-
|
35 |
# Establecer el token de relleno
|
36 |
if tokenizer.pad_token_id is None:
|
37 |
tokenizer.pad_token_id = tokenizer.eos_token_id
|
38 |
|
39 |
-
# Definir funciones espec铆ficas para Fireworks
|
40 |
-
function_spec = [
|
41 |
-
{
|
42 |
-
"name": "calculate_cosine_similarity",
|
43 |
-
"description": "Calculate the cosine similarity between two strings.",
|
44 |
-
"parameters": {
|
45 |
-
"type": "object",
|
46 |
-
"properties": {
|
47 |
-
"query": {
|
48 |
-
"type": "string",
|
49 |
-
"description": "The main query string for similarity calculation"
|
50 |
-
},
|
51 |
-
"job_title": {
|
52 |
-
"type": "string",
|
53 |
-
"description": "The job title to compare with the query"
|
54 |
-
}
|
55 |
-
},
|
56 |
-
"required": ["query", "job_title"]
|
57 |
-
}
|
58 |
-
}
|
59 |
-
]
|
60 |
-
functions = json.dumps(function_spec, indent=4)
|
61 |
-
|
62 |
# Crear pipeline para generaci贸n de texto con Fireworks
|
63 |
fireworks_pipeline = pipeline(
|
64 |
"text-generation",
|
65 |
model=model,
|
66 |
tokenizer=tokenizer,
|
67 |
-
max_new_tokens=
|
68 |
)
|
69 |
|
70 |
# Adaptar el pipeline a LangChain
|
@@ -84,7 +62,7 @@ if uploaded_file is not None:
|
|
84 |
job_titles = df['job_title'].tolist()
|
85 |
|
86 |
# Procesar en lotes para optimizaci贸n
|
87 |
-
batch_size =
|
88 |
job_titles_batches = [job_titles[i:i+batch_size] for i in range(0, len(job_titles), batch_size)]
|
89 |
|
90 |
# Definir el prompt para Fireworks
|
@@ -117,19 +95,22 @@ if uploaded_file is not None:
|
|
117 |
truncation=True
|
118 |
).to(model.device)
|
119 |
|
120 |
-
#
|
121 |
-
with torch.cuda.amp.autocast(): # Mixed Precision para m谩s velocidad
|
122 |
model_inputs['attention_mask'] = (model_inputs['input_ids'] != tokenizer.pad_token_id).int()
|
123 |
generated_ids = model.generate(
|
124 |
**model_inputs,
|
125 |
-
max_new_tokens=
|
126 |
num_beams=1 # Desactivar b煤squeda en beam para m谩s velocidad
|
127 |
)
|
128 |
-
|
129 |
# Decodificar el resultado y a帽adirlo a la lista de resultados
|
130 |
decoded = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
|
131 |
all_scores.extend([0.95] * len(batch)) # Simulaci贸n para demostraci贸n
|
132 |
-
|
|
|
|
|
|
|
|
|
133 |
# Asignar puntajes al DataFrame
|
134 |
df['Score'] = all_scores
|
135 |
|
@@ -142,7 +123,6 @@ if uploaded_file is not None:
|
|
142 |
st.error("La columna 'job_title' no se encuentra en el archivo CSV.")
|
143 |
|
144 |
|
145 |
-
|
146 |
'''
|
147 |
|
148 |
|
|
|
7 |
from huggingface_hub import login
|
8 |
import torch
|
9 |
import json
|
10 |
+
import os
|
11 |
from datetime import datetime
|
12 |
|
13 |
+
# Configurar variable de entorno para evitar la fragmentaci贸n de memoria en CUDA
|
14 |
+
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
|
15 |
+
|
16 |
# Autenticaci贸n con Fireworks en Hugging Face
|
17 |
huggingface_token = st.secrets["FIREWORKS"]
|
18 |
login(huggingface_token)
|
19 |
|
20 |
+
# Configurar modelo Fireworks con cuantizaci贸n int8 y offload en la CPU
|
21 |
quant_config = BitsAndBytesConfig(
|
22 |
load_in_8bit=True, # Activar la carga en int8
|
23 |
+
llm_int8_enable_fp32_cpu_offload=True, # Permitir offload en la CPU
|
24 |
quantization_scheme="gptq" # Especificar el esquema GPTQ
|
25 |
)
|
26 |
|
27 |
model_id = "fireworks-ai/firefunction-v2"
|
28 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
29 |
model = AutoModelForCausalLM.from_pretrained(
|
30 |
+
model_id,
|
31 |
+
device_map="auto", # Permitir el offload autom谩tico entre CPU y GPU
|
32 |
torch_dtype=torch.float16,
|
33 |
quantization_config=quant_config
|
34 |
)
|
35 |
|
|
|
|
|
|
|
36 |
# Establecer el token de relleno
|
37 |
if tokenizer.pad_token_id is None:
|
38 |
tokenizer.pad_token_id = tokenizer.eos_token_id
|
39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
# Crear pipeline para generaci贸n de texto con Fireworks
|
41 |
fireworks_pipeline = pipeline(
|
42 |
"text-generation",
|
43 |
model=model,
|
44 |
tokenizer=tokenizer,
|
45 |
+
max_new_tokens=20 # Reducir max_new_tokens para minimizar el uso de memoria
|
46 |
)
|
47 |
|
48 |
# Adaptar el pipeline a LangChain
|
|
|
62 |
job_titles = df['job_title'].tolist()
|
63 |
|
64 |
# Procesar en lotes para optimizaci贸n
|
65 |
+
batch_size = 4 # Reducir batch size para minimizar el uso de memoria
|
66 |
job_titles_batches = [job_titles[i:i+batch_size] for i in range(0, len(job_titles), batch_size)]
|
67 |
|
68 |
# Definir el prompt para Fireworks
|
|
|
95 |
truncation=True
|
96 |
).to(model.device)
|
97 |
|
98 |
+
with torch.cuda.amp.autocast(): # Usar Mixed Precision
|
|
|
99 |
model_inputs['attention_mask'] = (model_inputs['input_ids'] != tokenizer.pad_token_id).int()
|
100 |
generated_ids = model.generate(
|
101 |
**model_inputs,
|
102 |
+
max_new_tokens=20, # Reducir para minimizar el uso de memoria
|
103 |
num_beams=1 # Desactivar b煤squeda en beam para m谩s velocidad
|
104 |
)
|
105 |
+
|
106 |
# Decodificar el resultado y a帽adirlo a la lista de resultados
|
107 |
decoded = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
|
108 |
all_scores.extend([0.95] * len(batch)) # Simulaci贸n para demostraci贸n
|
109 |
+
|
110 |
+
# Liberar memoria despu茅s de cada batch
|
111 |
+
del model_inputs, generated_ids
|
112 |
+
torch.cuda.empty_cache()
|
113 |
+
|
114 |
# Asignar puntajes al DataFrame
|
115 |
df['Score'] = all_scores
|
116 |
|
|
|
123 |
st.error("La columna 'job_title' no se encuentra en el archivo CSV.")
|
124 |
|
125 |
|
|
|
126 |
'''
|
127 |
|
128 |
|