Spaces:
Runtime error
Runtime error
import pandas as pd | |
import numpy as np | |
import seaborn as sns | |
import matplotlib as plt | |
import matplotlib.pyplot as plt | |
from sklearn import preprocessing | |
from sklearn.preprocessing import LabelEncoder | |
import gradio as gr | |
from array import * | |
#from google.colab import drive | |
#drive.mount('/content/drive') | |
df_train = pd.read_csv("train_ctrUa4K.csv") #Reading the dataset in a dataframe using Pandas | |
df_train['Gender'].fillna("Male", inplace = True) | |
df_train['Married'].fillna("Yes", inplace = True) | |
df_train['Dependents'].fillna("0", inplace = True) | |
df_train['Self_Employed'].fillna("No", inplace = True) | |
df_train['Credit_History'].fillna(1.0, inplace = True) | |
df_train.isnull().sum() | |
def remove_outlier(col): | |
sorted(col) | |
Q1, Q3=col.quantile([0.25, 0.75]) | |
IQR=Q3-Q1 | |
lower_range=Q1-(1.5*IQR) | |
upper_range=Q3+(1.5*IQR) | |
return lower_range, upper_range | |
low_AI, high_AI=remove_outlier(df_train['ApplicantIncome']) | |
df_train['ApplicantIncome']=np.where(df_train['ApplicantIncome']>high_AI, high_AI, df_train['ApplicantIncome']) | |
df_train['ApplicantIncome']=np.where(df_train['ApplicantIncome']<low_AI, low_AI, df_train['ApplicantIncome']) | |
low_CI, high_CI=remove_outlier(df_train['CoapplicantIncome']) | |
df_train['CoapplicantIncome']=np.where(df_train['CoapplicantIncome']>high_CI, high_CI, df_train['CoapplicantIncome']) | |
df_train['CoapplicantIncome']=np.where(df_train['CoapplicantIncome']<low_CI, low_CI, df_train['CoapplicantIncome']) | |
low_LAT, high_LAT=remove_outlier(df_train['Loan_Amount_Term']) | |
df_train['Loan_Amount_Term']=np.where(df_train['Loan_Amount_Term']>high_LAT, high_LAT, df_train['Loan_Amount_Term']) | |
df_train['Loan_Amount_Term']=np.where(df_train['Loan_Amount_Term']<low_LAT, low_LAT, df_train['Loan_Amount_Term']) | |
df_train['Loan_Amount_Term'].fillna(360, inplace = True) | |
table = df_train.pivot_table(values='LoanAmount', index='Self_Employed' ,columns='Education', aggfunc=np.median) | |
def val(x): | |
return table.loc[x['Self_Employed'],x['Education']] | |
df_train['LoanAmount'].fillna(df_train[df_train['LoanAmount'].isnull()].apply(val, axis=1), inplace=True) | |
df_train['Total_income']=df_train['ApplicantIncome']+df_train['CoapplicantIncome'] | |
df_train.head() | |
df=df_train | |
label_encoder = preprocessing.LabelEncoder() | |
df['Gender']= label_encoder.fit_transform(df['Gender']) | |
df['Married']= label_encoder.fit_transform(df['Married']) | |
df['Education']= label_encoder.fit_transform(df['Education']) | |
df['Self_Employed']= label_encoder.fit_transform(df['Self_Employed']) | |
df['Property_Area']= label_encoder.fit_transform(df['Property_Area']) | |
df['Dependents']= label_encoder.fit_transform(df['Dependents']) | |
df.head() | |
x=df_train[['Gender','Married','Dependents','Education','Self_Employed', 'LoanAmount','Loan_Amount_Term','Credit_History','Property_Area', 'Total_income']] | |
y=df_train[['Loan_Status']] | |
from sklearn.model_selection import train_test_split | |
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=4) | |
"""LOGISTIC REGRESSION""" | |
from sklearn.metrics import classification_report, confusion_matrix | |
import itertools | |
def plot_confusion_matrix(cm, classes, | |
normalize=False, | |
title='Confusion matrix', | |
cmap=plt.cm.Blues): | |
""" | |
This function prints and plots the confusion matrix. | |
Normalization can be applied by setting `normalize=True`. | |
""" | |
if normalize: | |
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] | |
print("Normalized confusion matrix") | |
else: | |
print('Confusion matrix, without normalization') | |
print(cm) | |
plt.imshow(cm, interpolation='nearest', cmap=cmap) | |
plt.title(title) | |
plt.colorbar() | |
tick_marks = np.arange(len(classes)) | |
plt.xticks(tick_marks, classes, rotation=45) | |
plt.yticks(tick_marks, classes) | |
fmt = '.2f' if normalize else 'd' | |
thresh = cm.max() / 2. | |
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): | |
plt.text(j, i, format(cm[i, j], fmt), | |
horizontalalignment="center", | |
color="white" if cm[i, j] > thresh else "black") | |
plt.tight_layout() | |
plt.ylabel('True label') | |
plt.xlabel('Predicted label') | |
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV | |
from sklearn.linear_model import LogisticRegression | |
#from sklearn.metrics import confusion_matrix | |
parametersLR={ 'penalty' : ['l1', 'l2', 'elasticnet', 'none'], | |
'C': [1, 0.5, 0.1, 0.01], | |
'fit_intercept': [True, False], | |
'solver' : ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'], | |
'random_state':[10, 50, 100, 'none'] | |
} | |
LR = LogisticRegression() | |
#r = RandomizedSearchCV(LR,parametersLR) | |
g=GridSearchCV(LR, parametersLR) | |
g.fit(x_train, y_train) | |
ypred = g.predict(x_test) | |
def pred(Gender, Marital_Status, Dependents, Education, Self_Employed, Loan_Amount, Credit_History, Property_Area, Total_Income): | |
if Gender == "Male": | |
gen=1 | |
elif Gender =="Female": | |
gen=0 | |
if Marital_Status=="Married": | |
m=1 | |
elif Marital_Status=="Unmarried": | |
m=0 | |
if Dependents=="0": | |
d=0 | |
elif Dependents=="1": | |
d=1 | |
elif Dependents=="2": | |
d=2 | |
elif Dependents=="3+": | |
d=3 | |
if Education=="Educated": | |
e=1 | |
elif Education == "Uneducated": | |
e=0 | |
if Self_Employed=="Yes": | |
se=1 | |
elif Self_Employed=="No": | |
se=0 | |
if Property_Area=="Urban": | |
pa=0 | |
elif Property_Area=="Semi_Urban": | |
pa=1 | |
elif Propert_Area=="Rural": | |
pa=2 | |
l = {'Gender': [gen], | |
'Married': [m], | |
'Dependents':[d], | |
'Education':[e], | |
'Self_Employed':[se], | |
'LoanAmount':[Loan_Amount], | |
'Loan_Amount_Term':[360], | |
'Credit_History':[1], | |
'Property_Area':[pa], | |
'Total_income':[Total_Income] | |
} | |
df=pd.DataFrame(l) | |
ans = g.predict(df) | |
ans2 = ans.tolist() | |
if ans2[0]=="Y": | |
return "Loan Status: Approved!" | |
elif ans2[0]=="N": | |
return "Loan Status: Disapproved" | |
iface = gr.Interface( | |
fn=pred, | |
inputs=[gr.inputs.Radio(["Male", "Female"]), gr.inputs.Radio(["Married", "Unmarried"]),gr.inputs.Radio(["0", "1","2", "3+"]), gr.inputs.Radio(["Educated", "Uneducated"]), gr.inputs.Radio(["Yes", "No"]), "text", gr.inputs.Radio(["1", "0"]), gr.inputs.Radio(["Urban", "Semi_Urban", "Rural"]), "text"], | |
outputs="text") | |
iface.launch(inline=False) |