Diego-0121
commited on
Commit
•
96ba125
1
Parent(s):
2d39e80
Create vectorization.py
Browse files- vectorization.py +15 -0
vectorization.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from gensim.models import Word2Vec
|
2 |
+
import pandas as pd
|
3 |
+
|
4 |
+
|
5 |
+
spotify_data= pd.read_csv('spotify_data_processed.csv')
|
6 |
+
|
7 |
+
|
8 |
+
|
9 |
+
|
10 |
+
# Asumiendo que spotify_data['cleaned_text'] contiene listas de palabras (tokens)
|
11 |
+
spotify_data['cleaned_text'] = spotify_data['cleaned_text'].apply(eval)
|
12 |
+
|
13 |
+
model = Word2Vec(sentences=spotify_data['cleaned_text'], vector_size=100, window=10, min_count=1, workers=5)
|
14 |
+
# Guardar el modelo
|
15 |
+
model.save("word2vec_model.model")
|