Spaces:

niladridutta
/

detect_inactive_records

Sleeping

App Files Files Community

detect_inactive_records / app.py

niladridutta

Update app.py

c319fc9 verified 6 months ago

raw

history blame contribute delete

No virus

3.64 kB

	import streamlit as st
	import pandas as pd
	import openpyxl
	import lifetimes
	import numpy as np

	import matplotlib.pyplot as plt
	import seaborn as sns
	sns.set()

	import warnings
	warnings.filterwarnings('ignore')


	st.set_page_config(page_title='Detect Inactive Records')
	st.title('Detect Inactive Records')
	st.subheader('Upload your Excel file')
	uploaded_file = st.file_uploader('Choose a XLSX file', type='xlsx')
	if uploaded_file is not None:
	st.markdown('---')
	# Loading the data
	@st.cache_data
	def load_excel(file1):
	df = pd.read_excel(file1, engine='openpyxl', parse_dates=['InvoiceDate'])
	return df

	data = load_excel(uploaded_file)
	st.subheader('Data Preview')
	st.dataframe(data.head(20))
	# Feature selection
	features = ['CustomerID', 'InvoiceNo', 'InvoiceDate', 'Quantity', 'UnitPrice']
	data_clv = data[features]
	data_clv['TotalSales'] = data_clv['Quantity'].multiply(data_clv['UnitPrice'])
	#Check for missing values
	mising=pd.DataFrame(zip(data_clv.isnull().sum(), data_clv.isnull().sum()/len(data_clv)), columns=['Count', 'Proportion'], index=data_clv.columns)
	data_clv = data_clv[pd.notnull(data_clv['CustomerID'])]
	#Remove -ve values
	data_clv = data_clv[data_clv['TotalSales'] > 0]

	# Creating the summary data using summary_data_from_transaction_data function
	summary = lifetimes.utils.summary_data_from_transaction_data(data_clv, 'CustomerID', 'InvoiceDate', 'TotalSales' )
	summary = summary.reset_index()
	summary['frequency'].plot(kind='hist', bins=50)
	one_time_buyers = round(sum(summary['frequency'] == 0)/float(len(summary))*(100),2)
	# Fitting the BG/NBD model
	bgf = lifetimes.BetaGeoFitter(penalizer_coef=0.0)
	bgf.fit(summary['frequency'], summary['recency'], summary['T'])
	bgf_coefficient=bgf.summary
	# Compute the customer alive probability
	summary['probability_alive'] = bgf.conditional_probability_alive(summary['frequency'], summary['recency'], summary['T'])
	#Predict future transaction for the next 300 days based on historical dataa
	t = 300
	summary['Predicted No. of Transaction'] = round(bgf.conditional_expected_number_of_purchases_up_to_time(t, summary['frequency'], summary['recency'], summary['T']),2)
	summary.sort_values(by='Predicted No. of Transaction', ascending=False).head(10).reset_index()
	#Hidden trends
	ax = sns.countplot(x="Predicted No. of Transaction",data=summary)
	plt.scatter(summary['probability_alive'],summary['Predicted No. of Transaction'])
	summary_correlation=summary.corr()
	summary1=summary
	summary1['Active/Inactive']=summary1['Predicted No. of Transaction'].apply(lambda x:"ACTIVE" if x>=1 else "INACTIVE")
	selector=st.selectbox('Select User ID',summary1['CustomerID'],index=None,placeholder='Select Customer ID')
	summary2=summary1[['CustomerID','Active/Inactive']]
	if selector is not None:
	selected=summary2.loc[summary1['CustomerID']==selector].iloc[0,1]
	st.write('STATUS:',selected)
	trends= data_clv.groupby('CustomerID')['Quantity'].mean().reset_index()
	trends1= data_clv.groupby('CustomerID')['TotalSales'].mean().reset_index()
	summary1=summary1.merge(trends, how='left',on='CustomerID')
	summary1=summary1.merge(trends1, how='left',on='CustomerID')
	summary1.drop('probability_alive',axis=1,inplace=True)
	summary1.rename(columns={'Quantity':'Average Quantity','TotalSales':'Average Sales Value'},inplace=True)
	out=summary1.to_csv().encode('utf-8')
	st.download_button(label='DOWNLOAD RESULT',data=out, file_name='CLV_OUTPUT.csv',mime='text/csv')