|
import numpy as np |
|
import pandas as pd |
|
from sklearn.model_selection import train_test_split |
|
from sklearn.linear_model import LogisticRegression |
|
from sklearn.neighbors import KNeighborsClassifier |
|
from sklearn import svm |
|
from sklearn.tree import DecisionTreeClassifier |
|
from sklearn.ensemble import RandomForestClassifier |
|
from sklearn.ensemble import GradientBoostingClassifier |
|
from xgboost import XGBClassifier |
|
from sklearn.metrics import accuracy_score |
|
import joblib |
|
import pickle |
|
import onnx |
|
import random |
|
|
|
|
|
df=pd.read_csv("Placement (2).csv") |
|
df.head() |
|
|
|
df=df.drop(columns=["sl_no","stream","ssc_p","ssc_b","hsc_p","hsc_b","etest_p"]) |
|
df['internship'] = df['internship'].map({'Yes':random.randint(0,5),'No':0}) |
|
df['status'] = df['status'].map({'Placed':1,'Not Placed':0}) |
|
df.head() |
|
X_fullstk= df.drop(['status','management','leadership','communication','sales'],axis=1) |
|
y= df['status'] |
|
X_prodengg=df.drop(['status','DSA','java','communication','sales'],axis=1) |
|
X_mkt=df.drop(['status','management','leadership','DSA','java'],axis=1) |
|
X_train_fullstk,X_test_fullstk,y_train,y_test=train_test_split(X_fullstk,y,test_size=0.20,random_state=42) |
|
X_train_prodengg,X_test_prodengg,y_train,y_test=train_test_split(X_prodengg,y,test_size=0.20,random_state=42) |
|
X_train_mkt,X_test_mkt,y_train,y_test=train_test_split(X_mkt,y,test_size=0.20,random_state=42) |
|
rf_fullstk = RandomForestClassifier() |
|
rf_fullstk.fit(X_train_fullstk,y_train) |
|
|
|
rf_prodengg=RandomForestClassifier() |
|
rf_prodengg.fit(X_train_prodengg,y_train) |
|
|
|
rf_mkt=RandomForestClassifier() |
|
rf_mkt.fit(X_train_mkt,y_train) |
|
|
|
y_pred_full = rf_fullstk.predict(X_test_fullstk) |
|
y_pred_prodengg = rf_prodengg.predict(X_test_prodengg) |
|
y_pred_mkt = rf_mkt.predict(X_test_mkt) |
|
score1=accuracy_score(y_test,y_pred_full) |
|
score2=accuracy_score(y_test,y_pred_prodengg) |
|
score3=accuracy_score(y_test,y_pred_mkt) |
|
rf_fullstk.fit(X_fullstk,y) |
|
rf_mkt.fit(X_mkt,y) |
|
rf_prodengg.fit(X_prodengg,y) |
|
|
|
new_data_fullstk = pd.DataFrame({ |
|
'degree_p':75, |
|
'internship':1, |
|
'DSA':1, |
|
'java':0, |
|
},index=[0]) |
|
|
|
new_data_prodeng = pd.DataFrame({ |
|
'degree_p':75, |
|
'internship':0, |
|
'management':1, |
|
'leadership':0, |
|
},index=[0]) |
|
|
|
new_data_mkt = pd.DataFrame({ |
|
'degree_p':75, |
|
'internship':0, |
|
'communication':0, |
|
'sales':1, |
|
},index=[0]) |
|
|
|
(rf_fullstk.feature_importances_) |
|
(rf_mkt.feature_importances_) |
|
|
|
p_fstk=rf_fullstk.predict(new_data_fullstk) |
|
prob_fstk=rf_fullstk.predict_proba(new_data_fullstk) |
|
if p_fstk==1: |
|
print('Placed') |
|
print(f"You will be placed with probability of {prob_fstk[0][1]:.2f}") |
|
else: |
|
print("Not-placed") |
|
|
|
p_prodeng=rf_prodengg.predict(new_data_prodeng) |
|
prob_prdeng=rf_prodengg.predict_proba(new_data_prodeng) |
|
if p_prodeng==1: |
|
print('Placed') |
|
print(f"You will be placed with probability of {prob_prdeng[0][1]:.2f}") |
|
else: |
|
print("Not-placed") |
|
|
|
p_mkt=rf_mkt.predict(new_data_mkt) |
|
prob_mkt=rf_mkt.predict_proba(new_data_mkt) |
|
if p_mkt==1: |
|
print('Placed') |
|
print(f"You will be placed with probability of {prob_mkt[0][1]:.2f}") |
|
else: |
|
print("Not-placed") |
|
|
|
with open('rf_hacathon_fullstk.pkl', 'wb') as f1: |
|
pickle.dump(rf_fullstk, f1) |
|
with open('rf_hacathon_prodengg.pkl', 'wb') as f2: |
|
pickle.dump(rf_prodengg, f2) |
|
with open('rf_hacathon_mkt.pkl', 'wb') as f3: |
|
pickle.dump(rf_mkt, f3) |
|
|