Spaces:
Runtime error
Runtime error
import pandas as pd | |
import numpy as np | |
import seaborn as sns | |
import matplotlib as plt | |
import matplotlib.pyplot as plt | |
from sklearn import preprocessing | |
from sklearn.preprocessing import LabelEncoder | |
import gradio as gr | |
from array import * | |
#from google.colab import drive | |
#drive.mount('/content/drive') | |
df_train = pd.read_csv("train_ctrUa4K.csv") #Reading the dataset in a dataframe using Pandas | |
df_train.head() | |
df_train.describe() | |
df_train.shape | |
df_train.info() | |
df_train.isnull().sum() | |
print(df_train['Gender'].value_counts()) | |
print(df_train['Married'].value_counts()) | |
print(df_train['Dependents'].value_counts()) | |
print(df_train['Self_Employed'].value_counts()) | |
print(df_train['Credit_History'].value_counts()) | |
print(df_train['Property_Area'].value_counts()) | |
df_train['Gender'].fillna("Male", inplace = True) | |
df_train['Married'].fillna("Yes", inplace = True) | |
df_train['Dependents'].fillna("0", inplace = True) | |
df_train['Self_Employed'].fillna("No", inplace = True) | |
df_train['Credit_History'].fillna(1.0, inplace = True) | |
df_train.isnull().sum() | |
duplicate=df_train.duplicated() | |
print(duplicate.sum()) | |
df_train[duplicate] | |
fig, ax = plt.subplots(3, 2, figsize = (10, 7)) | |
sns.boxplot(x= df_train["ApplicantIncome"], ax = ax[0,0]) | |
sns.distplot(df_train['ApplicantIncome'], ax = ax[0,1]) | |
sns.boxplot(x= df_train["CoapplicantIncome"], ax = ax[1,0]) | |
sns.distplot(df_train['CoapplicantIncome'], ax = ax[1,1]) | |
sns.boxplot(x= df_train["Loan_Amount_Term"], ax = ax[2,0]) | |
sns.distplot(df_train['Loan_Amount_Term'], ax = ax[2,1]) | |
def remove_outlier(col): | |
sorted(col) | |
Q1, Q3=col.quantile([0.25, 0.75]) | |
IQR=Q3-Q1 | |
lower_range=Q1-(1.5*IQR) | |
upper_range=Q3+(1.5*IQR) | |
return lower_range, upper_range | |
low_AI, high_AI=remove_outlier(df_train['ApplicantIncome']) | |
df_train['ApplicantIncome']=np.where(df_train['ApplicantIncome']>high_AI, high_AI, df_train['ApplicantIncome']) | |
df_train['ApplicantIncome']=np.where(df_train['ApplicantIncome']<low_AI, low_AI, df_train['ApplicantIncome']) | |
low_CI, high_CI=remove_outlier(df_train['CoapplicantIncome']) | |
df_train['CoapplicantIncome']=np.where(df_train['CoapplicantIncome']>high_CI, high_CI, df_train['CoapplicantIncome']) | |
df_train['CoapplicantIncome']=np.where(df_train['CoapplicantIncome']<low_CI, low_CI, df_train['CoapplicantIncome']) | |
low_LAT, high_LAT=remove_outlier(df_train['Loan_Amount_Term']) | |
df_train['Loan_Amount_Term']=np.where(df_train['Loan_Amount_Term']>high_LAT, high_LAT, df_train['Loan_Amount_Term']) | |
df_train['Loan_Amount_Term']=np.where(df_train['Loan_Amount_Term']<low_LAT, low_LAT, df_train['Loan_Amount_Term']) | |
df_train.boxplot(column=['ApplicantIncome']) | |
plt.show() | |
df_train.boxplot(column=['CoapplicantIncome']) | |
plt.show() | |
df_train.boxplot(column=['Loan_Amount_Term']) | |
plt.show() | |
df_train.isnull().sum() | |
df_train['Loan_Amount_Term'].fillna(360, inplace = True) | |
table = df_train.pivot_table(values='LoanAmount', index='Self_Employed' ,columns='Education', aggfunc=np.median) | |
table | |
def val(x): | |
return table.loc[x['Self_Employed'],x['Education']] | |
df_train['LoanAmount'].fillna(df_train[df_train['LoanAmount'].isnull()].apply(val, axis=1), inplace=True) | |
df_train['Total_income']=df_train['ApplicantIncome']+df_train['CoapplicantIncome'] | |
df_train.head() | |
df=df_train | |
label_encoder = preprocessing.LabelEncoder() | |
df['Gender']= label_encoder.fit_transform(df['Gender']) | |
df | |
df['Married']= label_encoder.fit_transform(df['Married']) | |
df['Education']= label_encoder.fit_transform(df['Education']) | |
df['Self_Employed']= label_encoder.fit_transform(df['Self_Employed']) | |
df['Property_Area']= label_encoder.fit_transform(df['Property_Area']) | |
df['Dependents']= label_encoder.fit_transform(df['Dependents']) | |
df.head() | |
x=df_train[['Gender','Married','Dependents','Education','Self_Employed', 'LoanAmount','Loan_Amount_Term','Credit_History','Property_Area', 'Total_income']] | |
y=df_train[['Loan_Status']] | |
from sklearn.model_selection import train_test_split | |
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=4) | |
"""LOGISTIC REGRESSION""" | |
from sklearn.metrics import classification_report, confusion_matrix | |
import itertools | |
def plot_confusion_matrix(cm, classes, | |
normalize=False, | |
title='Confusion matrix', | |
cmap=plt.cm.Blues): | |
""" | |
This function prints and plots the confusion matrix. | |
Normalization can be applied by setting `normalize=True`. | |
""" | |
if normalize: | |
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] | |
print("Normalized confusion matrix") | |
else: | |
print('Confusion matrix, without normalization') | |
print(cm) | |
plt.imshow(cm, interpolation='nearest', cmap=cmap) | |
plt.title(title) | |
plt.colorbar() | |
tick_marks = np.arange(len(classes)) | |
plt.xticks(tick_marks, classes, rotation=45) | |
plt.yticks(tick_marks, classes) | |
fmt = '.2f' if normalize else 'd' | |
thresh = cm.max() / 2. | |
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): | |
plt.text(j, i, format(cm[i, j], fmt), | |
horizontalalignment="center", | |
color="white" if cm[i, j] > thresh else "black") | |
plt.tight_layout() | |
plt.ylabel('True label') | |
plt.xlabel('Predicted label') | |
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV | |
from sklearn.linear_model import LogisticRegression | |
#from sklearn.metrics import confusion_matrix | |
parametersLR={ 'penalty' : ['l1', 'l2', 'elasticnet', 'none'], | |
'C': [1, 0.5, 0.1, 0.01], | |
'fit_intercept': [True, False], | |
'solver' : ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'], | |
'random_state':[10, 50, 100, 'none'] | |
} | |
LR = LogisticRegression() | |
#r = RandomizedSearchCV(LR,parametersLR) | |
g=GridSearchCV(LR, parametersLR) | |
g.fit(x_train, y_train) | |
ypred = g.predict(x_test) | |
ypred | |
print (classification_report(y_test, ypred)) | |
l = {'Gender': [1], | |
'Married': [0], | |
'Dependents':[0], | |
'Education':[0], | |
'Self_Employed':[0], | |
'LoanAmount':[130], | |
'Loan_Amount_Term':[360], | |
'Credit_History':[1], | |
'Property_Area':[2], | |
'Total_income':[5849] | |
} | |
df=pd.DataFrame(l) | |
ans = g.predict(df) | |
ans2 = ans.tolist() | |
ans2[0] | |
df | |
def pred(Gender, Marital_Status, Dependents, Education, Self_Employed, Loan_Amount, Credit_History, Property_Area, Total_Income): | |
if Gender == "Male": | |
gen=1 | |
elif Gender =="Female": | |
gen=0 | |
if Marital_Status=="Married": | |
m=1 | |
elif Marital_Status=="Unmarried": | |
m=0 | |
if Dependents=="0": | |
d=0 | |
elif Dependents=="1": | |
d=1 | |
elif Dependents=="2": | |
d=2 | |
elif Dependents=="3+": | |
d=3 | |
if Education=="Educated": | |
e=1 | |
elif Education == "Uneducated": | |
e=0 | |
if Self_Employed=="Yes": | |
se=1 | |
elif Self_Employed=="No": | |
se=0 | |
if Credit_History=="1": | |
ch=1 | |
elif Credit_History=="0": | |
ch=0 | |
if Property_Area=="0": | |
pa=0 | |
elif Property_Area=="1": | |
pa=1 | |
elif Propert_Area=="2": | |
pa=2 | |
l = {'Gender': [gen], | |
'Married': [m], | |
'Dependents':[d], | |
'Education':[e], | |
'Self_Employed':[se], | |
'LoanAmount':[Loan_Amount], | |
'Loan_Amount_Term':[360], | |
'Credit_History':[ch], | |
'Property_Area':[pa], | |
'Total_income':[Total_Income] | |
} | |
df=pd.DataFrame(l) | |
ans = g.predict(df) | |
ans2 = ans.tolist() | |
if ans2[0]=="Y": | |
return "Loan Status: Approved!" | |
elif ans2[0]=="N": | |
return "Loan Status: Disapproved" | |
iface = gr.Interface( | |
fn=pred, | |
inputs=[gr.inputs.Radio(["Male", "Female"]), gr.inputs.Radio(["Married", "Unmarried"]),gr.inputs.Radio(["0", "1","2", "3+"]), gr.inputs.Radio(["Educated", "Uneducated"]), gr.inputs.Radio(["Yes", "No"]), "text", gr.inputs.Radio(["1", "0"]), gr.inputs.Radio(["0", "1", "2"]), "text"], | |
outputs="text") | |
iface.launch(inline=False) |