Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -16,42 +16,38 @@ tokenizer.pad_token = tokenizer.eos_token
|
|
16 |
|
17 |
MAX_INPUT_TOKEN_LENGTH = 10000
|
18 |
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
-
|
23 |
-
|
24 |
-
st.warning(f"Se recort贸 la entrada porque excedi贸 el l铆mite de {MAX_INPUT_TOKEN_LENGTH} tokens.")
|
25 |
|
26 |
-
|
27 |
-
|
28 |
-
input_ids
|
29 |
-
|
30 |
max_new_tokens=max_new_tokens,
|
31 |
-
do_sample=True,
|
32 |
-
top_k=20,
|
33 |
-
top_p=0.9,
|
34 |
temperature=temperature,
|
35 |
-
|
36 |
-
eos_token_id=[tokenizer.eos_token_id]
|
37 |
)
|
38 |
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
t.join() # Asegura que la generaci贸n haya terminado
|
43 |
-
|
44 |
-
outputs = []
|
45 |
-
for text in streamer:
|
46 |
-
outputs.append(text)
|
47 |
-
if not outputs:
|
48 |
-
raise ValueError("No se gener贸 ninguna respuesta.")
|
49 |
-
|
50 |
-
response = "".join(outputs).strip().split("\n")[0]
|
51 |
-
return response
|
52 |
-
except Exception as e:
|
53 |
-
st.error(f"Error durante la generaci贸n: {e}")
|
54 |
-
return "Error en la generaci贸n de texto."
|
55 |
|
56 |
def main():
|
57 |
st.title("Chat con Meta Llama 3.2 1B")
|
|
|
16 |
|
17 |
MAX_INPUT_TOKEN_LENGTH = 10000
|
18 |
|
19 |
+
# Asegurar que el token de padding est茅 configurado
|
20 |
+
if tokenizer.pad_token is None:
|
21 |
+
tokenizer.pad_token = tokenizer.eos_token # Asignar el token de padding al token de fin de oraci贸n
|
22 |
+
|
23 |
+
def generate_response(input_text, max_new_tokens=50, temperature=0.5):
|
24 |
+
"""
|
25 |
+
Genera una respuesta usando el modelo de lenguaje con m谩scara de atenci贸n.
|
26 |
+
"""
|
27 |
+
# Tokenizar la entrada y crear la m谩scara de atenci贸n
|
28 |
+
inputs = tokenizer(
|
29 |
+
input_text,
|
30 |
+
return_tensors='pt',
|
31 |
+
padding=True,
|
32 |
+
truncation=True,
|
33 |
+
max_length=512 # Ajustar seg煤n sea necesario
|
34 |
+
)
|
35 |
|
36 |
+
input_ids = inputs['input_ids'].to(model.device)
|
37 |
+
attention_mask = inputs['attention_mask'].to(model.device)
|
|
|
38 |
|
39 |
+
# Generar texto con la m谩scara de atenci贸n y el token de padding
|
40 |
+
outputs = model.generate(
|
41 |
+
input_ids,
|
42 |
+
attention_mask=attention_mask,
|
43 |
max_new_tokens=max_new_tokens,
|
|
|
|
|
|
|
44 |
temperature=temperature,
|
45 |
+
pad_token_id=tokenizer.pad_token_id # Usar el token de padding configurado
|
|
|
46 |
)
|
47 |
|
48 |
+
# Decodificar la respuesta generada
|
49 |
+
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
50 |
+
return response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
def main():
|
53 |
st.title("Chat con Meta Llama 3.2 1B")
|