niladridutta's picture
Update app.py
c319fc9 verified
raw
history blame contribute delete
No virus
3.64 kB
import streamlit as st
import pandas as pd
import openpyxl
import lifetimes
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
import warnings
warnings.filterwarnings('ignore')
st.set_page_config(page_title='Detect Inactive Records')
st.title('Detect Inactive Records')
st.subheader('Upload your Excel file')
uploaded_file = st.file_uploader('Choose a XLSX file', type='xlsx')
if uploaded_file is not None:
st.markdown('---')
# Loading the data
@st.cache_data
def load_excel(file1):
df = pd.read_excel(file1, engine='openpyxl', parse_dates=['InvoiceDate'])
return df
data = load_excel(uploaded_file)
st.subheader('Data Preview')
st.dataframe(data.head(20))
# Feature selection
features = ['CustomerID', 'InvoiceNo', 'InvoiceDate', 'Quantity', 'UnitPrice']
data_clv = data[features]
data_clv['TotalSales'] = data_clv['Quantity'].multiply(data_clv['UnitPrice'])
#Check for missing values
mising=pd.DataFrame(zip(data_clv.isnull().sum(), data_clv.isnull().sum()/len(data_clv)), columns=['Count', 'Proportion'], index=data_clv.columns)
data_clv = data_clv[pd.notnull(data_clv['CustomerID'])]
#Remove -ve values
data_clv = data_clv[data_clv['TotalSales'] > 0]
# Creating the summary data using summary_data_from_transaction_data function
summary = lifetimes.utils.summary_data_from_transaction_data(data_clv, 'CustomerID', 'InvoiceDate', 'TotalSales' )
summary = summary.reset_index()
summary['frequency'].plot(kind='hist', bins=50)
one_time_buyers = round(sum(summary['frequency'] == 0)/float(len(summary))*(100),2)
# Fitting the BG/NBD model
bgf = lifetimes.BetaGeoFitter(penalizer_coef=0.0)
bgf.fit(summary['frequency'], summary['recency'], summary['T'])
bgf_coefficient=bgf.summary
# Compute the customer alive probability
summary['probability_alive'] = bgf.conditional_probability_alive(summary['frequency'], summary['recency'], summary['T'])
#Predict future transaction for the next 300 days based on historical dataa
t = 300
summary['Predicted No. of Transaction'] = round(bgf.conditional_expected_number_of_purchases_up_to_time(t, summary['frequency'], summary['recency'], summary['T']),2)
summary.sort_values(by='Predicted No. of Transaction', ascending=False).head(10).reset_index()
#Hidden trends
ax = sns.countplot(x="Predicted No. of Transaction",data=summary)
plt.scatter(summary['probability_alive'],summary['Predicted No. of Transaction'])
summary_correlation=summary.corr()
summary1=summary
summary1['Active/Inactive']=summary1['Predicted No. of Transaction'].apply(lambda x:"ACTIVE" if x>=1 else "INACTIVE")
selector=st.selectbox('Select User ID',summary1['CustomerID'],index=None,placeholder='Select Customer ID')
summary2=summary1[['CustomerID','Active/Inactive']]
if selector is not None:
selected=summary2.loc[summary1['CustomerID']==selector].iloc[0,1]
st.write('STATUS:',selected)
trends= data_clv.groupby('CustomerID')['Quantity'].mean().reset_index()
trends1= data_clv.groupby('CustomerID')['TotalSales'].mean().reset_index()
summary1=summary1.merge(trends, how='left',on='CustomerID')
summary1=summary1.merge(trends1, how='left',on='CustomerID')
summary1.drop('probability_alive',axis=1,inplace=True)
summary1.rename(columns={'Quantity':'Average Quantity','TotalSales':'Average Sales Value'},inplace=True)
out=summary1.to_csv().encode('utf-8')
st.download_button(label='DOWNLOAD RESULT',data=out, file_name='CLV_OUTPUT.csv',mime='text/csv')