chelscelis commited on
Commit
77e86cf
1 Parent(s): 263ccb8

Upload 3 files

Browse files
Files changed (3) hide show
  1. knn_model.joblib +2 -2
  2. nca_model.joblib +2 -2
  3. train_classifier.py +7 -2
knn_model.joblib CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69d9b5be2223b9acb49fe7cf16a6df47daa312ed35b21e2a9341b9ae97575c60
3
- size 4223478
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ceaeb3f040b36abf9aadbd303aeb88f4745d1a41c620b803d6a56c3229e0dd1
3
+ size 5725078
nca_model.joblib CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4fd5ab8d8adb40ead5421e8d90e36c99004f2af426be6659e7add2f0c58893e7
3
- size 43294492
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5053da7e87086d6ec242c2ae73df5e0a299ab0cb2dc395c81f8cc625f84987a
3
+ size 57724892
train_classifier.py CHANGED
@@ -55,7 +55,7 @@ word_vectorizer.fit(requiredText)
55
  joblib.dump(word_vectorizer, 'tfidf_vectorizer.joblib')
56
  WordFeatures = word_vectorizer.transform(requiredText)
57
 
58
- nca = NeighborhoodComponentsAnalysis(n_components=300, random_state=42)
59
  WordFeatures = nca.fit_transform(WordFeatures.toarray(), requiredTarget)
60
  nca_filename = f'nca_model.joblib'
61
  joblib.dump(nca, nca_filename)
@@ -72,6 +72,11 @@ print(X_test.shape)
72
  # knn = KNeighborsClassifier()
73
  # gs = GridSearchCV(estimator=knn, param_grid=param_grid, scoring="accuracy", verbose=1, cv=10, n_jobs=3)
74
  # grid_search = gs.fit(X_train, y_train)
 
 
 
 
 
75
  # best_score = grid_search.best_score_
76
  # best_parameters = grid_search.best_params_
77
  # print("Best Score:", best_score)
@@ -80,7 +85,7 @@ print(X_test.shape)
80
  knn = KNeighborsClassifier(n_neighbors=1,
81
  metric='manhattan',
82
  weights='uniform',
83
- algorithm='ball_tree',
84
  )
85
  knn.fit(X_train, y_train)
86
 
 
55
  joblib.dump(word_vectorizer, 'tfidf_vectorizer.joblib')
56
  WordFeatures = word_vectorizer.transform(requiredText)
57
 
58
+ nca = NeighborhoodComponentsAnalysis(n_components=400, random_state=42)
59
  WordFeatures = nca.fit_transform(WordFeatures.toarray(), requiredTarget)
60
  nca_filename = f'nca_model.joblib'
61
  joblib.dump(nca, nca_filename)
 
72
  # knn = KNeighborsClassifier()
73
  # gs = GridSearchCV(estimator=knn, param_grid=param_grid, scoring="accuracy", verbose=1, cv=10, n_jobs=3)
74
  # grid_search = gs.fit(X_train, y_train)
75
+ # results_df = pd.DataFrame(grid_search.cv_results_)
76
+ # # results_df.to_excel('grid_search_results_with_nca_500.xlsx', index=False)
77
+ # # results_df.to_excel('grid_search_results_with_nca_400.xlsx', index=False)
78
+ # # results_df.to_excel('grid_search_results_with_nca_300.xlsx', index=False)
79
+ # # results_df.to_excel('grid_search_results_no_nca.xlsx', index=False)
80
  # best_score = grid_search.best_score_
81
  # best_parameters = grid_search.best_params_
82
  # print("Best Score:", best_score)
 
85
  knn = KNeighborsClassifier(n_neighbors=1,
86
  metric='manhattan',
87
  weights='uniform',
88
+ algorithm='kd_tree',
89
  )
90
  knn.fit(X_train, y_train)
91