File size: 3,304 Bytes
6706a4b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn import svm
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
import joblib
import pickle
import onnx
import random


df=pd.read_csv("Placement (2).csv")
df.head()

df=df.drop(columns=["sl_no","stream","ssc_p","ssc_b","hsc_p","hsc_b","etest_p"])
df['internship'] = df['internship'].map({'Yes':random.randint(0,5),'No':0})
df['status'] = df['status'].map({'Placed':1,'Not Placed':0})
df.head()
X_fullstk= df.drop(['status','management','leadership','communication','sales'],axis=1)
y= df['status']
X_prodengg=df.drop(['status','DSA','java','communication','sales'],axis=1)
X_mkt=df.drop(['status','management','leadership','DSA','java'],axis=1)
X_train_fullstk,X_test_fullstk,y_train,y_test=train_test_split(X_fullstk,y,test_size=0.20,random_state=42)
X_train_prodengg,X_test_prodengg,y_train,y_test=train_test_split(X_prodengg,y,test_size=0.20,random_state=42)
X_train_mkt,X_test_mkt,y_train,y_test=train_test_split(X_mkt,y,test_size=0.20,random_state=42)
rf_fullstk = RandomForestClassifier()
rf_fullstk.fit(X_train_fullstk,y_train)

rf_prodengg=RandomForestClassifier()
rf_prodengg.fit(X_train_prodengg,y_train)

rf_mkt=RandomForestClassifier()
rf_mkt.fit(X_train_mkt,y_train)

y_pred_full = rf_fullstk.predict(X_test_fullstk)
y_pred_prodengg = rf_prodengg.predict(X_test_prodengg)
y_pred_mkt = rf_mkt.predict(X_test_mkt)
score1=accuracy_score(y_test,y_pred_full)
score2=accuracy_score(y_test,y_pred_prodengg)
score3=accuracy_score(y_test,y_pred_mkt)
rf_fullstk.fit(X_fullstk,y)
rf_mkt.fit(X_mkt,y)
rf_prodengg.fit(X_prodengg,y)

new_data_fullstk = pd.DataFrame({
    'degree_p':75,
    'internship':1,
    'DSA':1,
    'java':0,
},index=[0])

new_data_prodeng = pd.DataFrame({
    'degree_p':75,
    'internship':0,
    'management':1,
    'leadership':0,
},index=[0])

new_data_mkt = pd.DataFrame({
    'degree_p':75,
    'internship':0,
    'communication':0,
    'sales':1,
},index=[0])

(rf_fullstk.feature_importances_)
(rf_mkt.feature_importances_)

p_fstk=rf_fullstk.predict(new_data_fullstk)
prob_fstk=rf_fullstk.predict_proba(new_data_fullstk)
if p_fstk==1:
    print('Placed')
    print(f"You will be placed with probability of {prob_fstk[0][1]:.2f}")
else:
    print("Not-placed")
    
p_prodeng=rf_prodengg.predict(new_data_prodeng)
prob_prdeng=rf_prodengg.predict_proba(new_data_prodeng)
if p_prodeng==1:
    print('Placed')
    print(f"You will be placed with probability of {prob_prdeng[0][1]:.2f}")
else:
    print("Not-placed")

p_mkt=rf_mkt.predict(new_data_mkt)
prob_mkt=rf_mkt.predict_proba(new_data_mkt)
if p_mkt==1:
    print('Placed')
    print(f"You will be placed with probability of {prob_mkt[0][1]:.2f}")
else:
    print("Not-placed")
    
with open('rf_hacathon_fullstk.pkl', 'wb') as f1:
    pickle.dump(rf_fullstk, f1)
with open('rf_hacathon_prodengg.pkl', 'wb') as f2:
    pickle.dump(rf_prodengg, f2)
with open('rf_hacathon_mkt.pkl', 'wb') as f3:
    pickle.dump(rf_mkt, f3)