yitingliii
commited on
Commit
•
9222ce0
1
Parent(s):
c704520
Update tfidf.py
Browse files
tfidf.py
CHANGED
@@ -7,7 +7,7 @@ X = df['title']
|
|
7 |
y = df['labels']
|
8 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
9 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
10 |
-
tfidf = TfidfVectorizer(max_features=
|
11 |
X_train_tfidf = tfidf.fit_transform(X_train)
|
12 |
X_test_tfidf = tfidf.transform(X_test)
|
13 |
|
|
|
7 |
y = df['labels']
|
8 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
9 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
10 |
+
tfidf = TfidfVectorizer(max_features=7000, ngram_range=(1, 3), stop_words='english')
|
11 |
X_train_tfidf = tfidf.fit_transform(X_train)
|
12 |
X_test_tfidf = tfidf.transform(X_test)
|
13 |
|