chelscelis
commited on
Commit
•
77e86cf
1
Parent(s):
263ccb8
Upload 3 files
Browse files- knn_model.joblib +2 -2
- nca_model.joblib +2 -2
- train_classifier.py +7 -2
knn_model.joblib
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ceaeb3f040b36abf9aadbd303aeb88f4745d1a41c620b803d6a56c3229e0dd1
|
3 |
+
size 5725078
|
nca_model.joblib
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c5053da7e87086d6ec242c2ae73df5e0a299ab0cb2dc395c81f8cc625f84987a
|
3 |
+
size 57724892
|
train_classifier.py
CHANGED
@@ -55,7 +55,7 @@ word_vectorizer.fit(requiredText)
|
|
55 |
joblib.dump(word_vectorizer, 'tfidf_vectorizer.joblib')
|
56 |
WordFeatures = word_vectorizer.transform(requiredText)
|
57 |
|
58 |
-
nca = NeighborhoodComponentsAnalysis(n_components=
|
59 |
WordFeatures = nca.fit_transform(WordFeatures.toarray(), requiredTarget)
|
60 |
nca_filename = f'nca_model.joblib'
|
61 |
joblib.dump(nca, nca_filename)
|
@@ -72,6 +72,11 @@ print(X_test.shape)
|
|
72 |
# knn = KNeighborsClassifier()
|
73 |
# gs = GridSearchCV(estimator=knn, param_grid=param_grid, scoring="accuracy", verbose=1, cv=10, n_jobs=3)
|
74 |
# grid_search = gs.fit(X_train, y_train)
|
|
|
|
|
|
|
|
|
|
|
75 |
# best_score = grid_search.best_score_
|
76 |
# best_parameters = grid_search.best_params_
|
77 |
# print("Best Score:", best_score)
|
@@ -80,7 +85,7 @@ print(X_test.shape)
|
|
80 |
knn = KNeighborsClassifier(n_neighbors=1,
|
81 |
metric='manhattan',
|
82 |
weights='uniform',
|
83 |
-
algorithm='
|
84 |
)
|
85 |
knn.fit(X_train, y_train)
|
86 |
|
|
|
55 |
joblib.dump(word_vectorizer, 'tfidf_vectorizer.joblib')
|
56 |
WordFeatures = word_vectorizer.transform(requiredText)
|
57 |
|
58 |
+
nca = NeighborhoodComponentsAnalysis(n_components=400, random_state=42)
|
59 |
WordFeatures = nca.fit_transform(WordFeatures.toarray(), requiredTarget)
|
60 |
nca_filename = f'nca_model.joblib'
|
61 |
joblib.dump(nca, nca_filename)
|
|
|
72 |
# knn = KNeighborsClassifier()
|
73 |
# gs = GridSearchCV(estimator=knn, param_grid=param_grid, scoring="accuracy", verbose=1, cv=10, n_jobs=3)
|
74 |
# grid_search = gs.fit(X_train, y_train)
|
75 |
+
# results_df = pd.DataFrame(grid_search.cv_results_)
|
76 |
+
# # results_df.to_excel('grid_search_results_with_nca_500.xlsx', index=False)
|
77 |
+
# # results_df.to_excel('grid_search_results_with_nca_400.xlsx', index=False)
|
78 |
+
# # results_df.to_excel('grid_search_results_with_nca_300.xlsx', index=False)
|
79 |
+
# # results_df.to_excel('grid_search_results_no_nca.xlsx', index=False)
|
80 |
# best_score = grid_search.best_score_
|
81 |
# best_parameters = grid_search.best_params_
|
82 |
# print("Best Score:", best_score)
|
|
|
85 |
knn = KNeighborsClassifier(n_neighbors=1,
|
86 |
metric='manhattan',
|
87 |
weights='uniform',
|
88 |
+
algorithm='kd_tree',
|
89 |
)
|
90 |
knn.fit(X_train, y_train)
|
91 |
|