Spaces:

sanjana
/

Loan-Prediction-Analysis

Runtime error

App Files Files Community

Loan-Prediction-Analysis / app.py

sanjana

Update app.py

032e42d almost 3 years ago

raw

history blame contribute delete

6.35 kB

	import pandas as pd
	import numpy as np
	import seaborn as sns
	import matplotlib as plt
	import matplotlib.pyplot as plt
	from sklearn import preprocessing
	from sklearn.preprocessing import LabelEncoder
	import gradio as gr
	from array import *


	#from google.colab import drive
	#drive.mount('/content/drive')

	df_train = pd.read_csv("train_ctrUa4K.csv") #Reading the dataset in a dataframe using Pandas

	df_train['Gender'].fillna("Male", inplace = True)
	df_train['Married'].fillna("Yes", inplace = True)
	df_train['Dependents'].fillna("0", inplace = True)
	df_train['Self_Employed'].fillna("No", inplace = True)
	df_train['Credit_History'].fillna(1.0, inplace = True)
	df_train.isnull().sum()


	def remove_outlier(col):
	sorted(col)
	Q1, Q3=col.quantile([0.25, 0.75])
	IQR=Q3-Q1
	lower_range=Q1-(1.5*IQR)
	upper_range=Q3+(1.5*IQR)
	return lower_range, upper_range

	low_AI, high_AI=remove_outlier(df_train['ApplicantIncome'])
	df_train['ApplicantIncome']=np.where(df_train['ApplicantIncome']>high_AI, high_AI, df_train['ApplicantIncome'])
	df_train['ApplicantIncome']=np.where(df_train['ApplicantIncome']<low_AI, low_AI, df_train['ApplicantIncome'])

	low_CI, high_CI=remove_outlier(df_train['CoapplicantIncome'])
	df_train['CoapplicantIncome']=np.where(df_train['CoapplicantIncome']>high_CI, high_CI, df_train['CoapplicantIncome'])
	df_train['CoapplicantIncome']=np.where(df_train['CoapplicantIncome']<low_CI, low_CI, df_train['CoapplicantIncome'])

	low_LAT, high_LAT=remove_outlier(df_train['Loan_Amount_Term'])
	df_train['Loan_Amount_Term']=np.where(df_train['Loan_Amount_Term']>high_LAT, high_LAT, df_train['Loan_Amount_Term'])
	df_train['Loan_Amount_Term']=np.where(df_train['Loan_Amount_Term']<low_LAT, low_LAT, df_train['Loan_Amount_Term'])


	df_train['Loan_Amount_Term'].fillna(360, inplace = True)

	table = df_train.pivot_table(values='LoanAmount', index='Self_Employed' ,columns='Education', aggfunc=np.median)


	def val(x):
	return table.loc[x['Self_Employed'],x['Education']]

	df_train['LoanAmount'].fillna(df_train[df_train['LoanAmount'].isnull()].apply(val, axis=1), inplace=True)

	df_train['Total_income']=df_train['ApplicantIncome']+df_train['CoapplicantIncome']

	df_train.head()

	df=df_train

	label_encoder = preprocessing.LabelEncoder()
	df['Gender']= label_encoder.fit_transform(df['Gender'])

	df['Married']= label_encoder.fit_transform(df['Married'])
	df['Education']= label_encoder.fit_transform(df['Education'])
	df['Self_Employed']= label_encoder.fit_transform(df['Self_Employed'])
	df['Property_Area']= label_encoder.fit_transform(df['Property_Area'])
	df['Dependents']= label_encoder.fit_transform(df['Dependents'])

	df.head()

	x=df_train[['Gender','Married','Dependents','Education','Self_Employed', 'LoanAmount','Loan_Amount_Term','Credit_History','Property_Area', 'Total_income']]

	y=df_train[['Loan_Status']]

	from sklearn.model_selection import train_test_split
	x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=4)

	"""LOGISTIC REGRESSION"""

	from sklearn.metrics import classification_report, confusion_matrix
	import itertools
	def plot_confusion_matrix(cm, classes,
	normalize=False,
	title='Confusion matrix',
	cmap=plt.cm.Blues):
	"""
	This function prints and plots the confusion matrix.
	Normalization can be applied by setting `normalize=True`.
	"""
	if normalize:
	cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
	print("Normalized confusion matrix")
	else:
	print('Confusion matrix, without normalization')

	print(cm)

	plt.imshow(cm, interpolation='nearest', cmap=cmap)
	plt.title(title)
	plt.colorbar()
	tick_marks = np.arange(len(classes))
	plt.xticks(tick_marks, classes, rotation=45)
	plt.yticks(tick_marks, classes)
	fmt = '.2f' if normalize else 'd'
	thresh = cm.max() / 2.
	for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
	plt.text(j, i, format(cm[i, j], fmt),
	horizontalalignment="center",
	color="white" if cm[i, j] > thresh else "black")

	plt.tight_layout()
	plt.ylabel('True label')
	plt.xlabel('Predicted label')

	from sklearn.model_selection import GridSearchCV,RandomizedSearchCV
	from sklearn.linear_model import LogisticRegression
	#from sklearn.metrics import confusion_matrix
	parametersLR={ 'penalty' : ['l1', 'l2', 'elasticnet', 'none'],
	'C': [1, 0.5, 0.1, 0.01],
	'fit_intercept': [True, False],
	'solver' : ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
	'random_state':[10, 50, 100, 'none']
	}
	LR = LogisticRegression()
	#r = RandomizedSearchCV(LR,parametersLR)
	g=GridSearchCV(LR, parametersLR)
	g.fit(x_train, y_train)

	ypred = g.predict(x_test)

	def pred(Gender, Marital_Status, Dependents, Education, Self_Employed, Loan_Amount, Credit_History, Property_Area, Total_Income):
	if Gender == "Male":
	gen=1
	elif Gender =="Female":
	gen=0
	if Marital_Status=="Married":
	m=1
	elif Marital_Status=="Unmarried":
	m=0
	if Dependents=="0":
	d=0
	elif Dependents=="1":
	d=1
	elif Dependents=="2":
	d=2
	elif Dependents=="3+":
	d=3
	if Education=="Educated":
	e=1
	elif Education == "Uneducated":
	e=0
	if Self_Employed=="Yes":
	se=1
	elif Self_Employed=="No":
	se=0
	if Property_Area=="Urban":
	pa=0
	elif Property_Area=="Semi_Urban":
	pa=1
	elif Propert_Area=="Rural":
	pa=2

	l = {'Gender': [gen],
	'Married': [m],
	'Dependents':[d],
	'Education':[e],
	'Self_Employed':[se],
	'LoanAmount':[Loan_Amount],
	'Loan_Amount_Term':[360],
	'Credit_History':[1],
	'Property_Area':[pa],
	'Total_income':[Total_Income]
	}
	df=pd.DataFrame(l)
	ans = g.predict(df)
	ans2 = ans.tolist()
	if ans2[0]=="Y":
	return "Loan Status: Approved!"
	elif ans2[0]=="N":
	return "Loan Status: Disapproved"

	iface = gr.Interface(
	fn=pred,
	inputs=[gr.inputs.Radio(["Male", "Female"]), gr.inputs.Radio(["Married", "Unmarried"]),gr.inputs.Radio(["0", "1","2", "3+"]), gr.inputs.Radio(["Educated", "Uneducated"]), gr.inputs.Radio(["Yes", "No"]), "text", gr.inputs.Radio(["1", "0"]), gr.inputs.Radio(["Urban", "Semi_Urban", "Rural"]), "text"],
	outputs="text")
	iface.launch(inline=False)