import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn.neighbors import KNeighborsClassifier from sklearn import svm from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble import RandomForestClassifier from sklearn.ensemble import GradientBoostingClassifier from xgboost import XGBClassifier from sklearn.metrics import accuracy_score import joblib import pickle import onnx import random df=pd.read_csv("Placement (2).csv") df.head() df=df.drop(columns=["sl_no","stream","ssc_p","ssc_b","hsc_p","hsc_b","etest_p"]) df['internship'] = df['internship'].map({'Yes':random.randint(0,5),'No':0}) df['status'] = df['status'].map({'Placed':1,'Not Placed':0}) df.head() X_fullstk= df.drop(['status','management','leadership','communication','sales'],axis=1) y= df['status'] X_prodengg=df.drop(['status','DSA','java','communication','sales'],axis=1) X_mkt=df.drop(['status','management','leadership','DSA','java'],axis=1) X_train_fullstk,X_test_fullstk,y_train,y_test=train_test_split(X_fullstk,y,test_size=0.20,random_state=42) X_train_prodengg,X_test_prodengg,y_train,y_test=train_test_split(X_prodengg,y,test_size=0.20,random_state=42) X_train_mkt,X_test_mkt,y_train,y_test=train_test_split(X_mkt,y,test_size=0.20,random_state=42) rf_fullstk = RandomForestClassifier() rf_fullstk.fit(X_train_fullstk,y_train) rf_prodengg=RandomForestClassifier() rf_prodengg.fit(X_train_prodengg,y_train) rf_mkt=RandomForestClassifier() rf_mkt.fit(X_train_mkt,y_train) y_pred_full = rf_fullstk.predict(X_test_fullstk) y_pred_prodengg = rf_prodengg.predict(X_test_prodengg) y_pred_mkt = rf_mkt.predict(X_test_mkt) score1=accuracy_score(y_test,y_pred_full) score2=accuracy_score(y_test,y_pred_prodengg) score3=accuracy_score(y_test,y_pred_mkt) rf_fullstk.fit(X_fullstk,y) rf_mkt.fit(X_mkt,y) rf_prodengg.fit(X_prodengg,y) new_data_fullstk = pd.DataFrame({ 'degree_p':75, 'internship':1, 'DSA':1, 'java':0, },index=[0]) new_data_prodeng = pd.DataFrame({ 'degree_p':75, 'internship':0, 'management':1, 'leadership':0, },index=[0]) new_data_mkt = pd.DataFrame({ 'degree_p':75, 'internship':0, 'communication':0, 'sales':1, },index=[0]) (rf_fullstk.feature_importances_) (rf_mkt.feature_importances_) p_fstk=rf_fullstk.predict(new_data_fullstk) prob_fstk=rf_fullstk.predict_proba(new_data_fullstk) if p_fstk==1: print('Placed') print(f"You will be placed with probability of {prob_fstk[0][1]:.2f}") else: print("Not-placed") p_prodeng=rf_prodengg.predict(new_data_prodeng) prob_prdeng=rf_prodengg.predict_proba(new_data_prodeng) if p_prodeng==1: print('Placed') print(f"You will be placed with probability of {prob_prdeng[0][1]:.2f}") else: print("Not-placed") p_mkt=rf_mkt.predict(new_data_mkt) prob_mkt=rf_mkt.predict_proba(new_data_mkt) if p_mkt==1: print('Placed') print(f"You will be placed with probability of {prob_mkt[0][1]:.2f}") else: print("Not-placed") with open('rf_hacathon_fullstk.pkl', 'wb') as f1: pickle.dump(rf_fullstk, f1) with open('rf_hacathon_prodengg.pkl', 'wb') as f2: pickle.dump(rf_prodengg, f2) with open('rf_hacathon_mkt.pkl', 'wb') as f3: pickle.dump(rf_mkt, f3)