Diego-0121 commited on
Commit
96ba125
1 Parent(s): 2d39e80

Create vectorization.py

Browse files
Files changed (1) hide show
  1. vectorization.py +15 -0
vectorization.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from gensim.models import Word2Vec
2
+ import pandas as pd
3
+
4
+
5
+ spotify_data= pd.read_csv('spotify_data_processed.csv')
6
+
7
+
8
+
9
+
10
+ # Asumiendo que spotify_data['cleaned_text'] contiene listas de palabras (tokens)
11
+ spotify_data['cleaned_text'] = spotify_data['cleaned_text'].apply(eval)
12
+
13
+ model = Word2Vec(sentences=spotify_data['cleaned_text'], vector_size=100, window=10, min_count=1, workers=5)
14
+ # Guardar el modelo
15
+ model.save("word2vec_model.model")