import streamlit as st import pandas as pd import openpyxl import lifetimes import numpy as np import matplotlib.pyplot as plt import seaborn as sns sns.set() import warnings warnings.filterwarnings('ignore') st.set_page_config(page_title='Detect Inactive Records') st.title('Detect Inactive Records') st.subheader('Upload your Excel file') uploaded_file = st.file_uploader('Choose a XLSX file', type='xlsx') if uploaded_file is not None: st.markdown('---') # Loading the data @st.cache_data def load_excel(file1): df = pd.read_excel(file1, engine='openpyxl', parse_dates=['InvoiceDate']) return df data = load_excel(uploaded_file) st.subheader('Data Preview') st.dataframe(data.head(20)) # Feature selection features = ['CustomerID', 'InvoiceNo', 'InvoiceDate', 'Quantity', 'UnitPrice'] data_clv = data[features] data_clv['TotalSales'] = data_clv['Quantity'].multiply(data_clv['UnitPrice']) #Check for missing values mising=pd.DataFrame(zip(data_clv.isnull().sum(), data_clv.isnull().sum()/len(data_clv)), columns=['Count', 'Proportion'], index=data_clv.columns) data_clv = data_clv[pd.notnull(data_clv['CustomerID'])] #Remove -ve values data_clv = data_clv[data_clv['TotalSales'] > 0] # Creating the summary data using summary_data_from_transaction_data function summary = lifetimes.utils.summary_data_from_transaction_data(data_clv, 'CustomerID', 'InvoiceDate', 'TotalSales' ) summary = summary.reset_index() summary['frequency'].plot(kind='hist', bins=50) one_time_buyers = round(sum(summary['frequency'] == 0)/float(len(summary))*(100),2) # Fitting the BG/NBD model bgf = lifetimes.BetaGeoFitter(penalizer_coef=0.0) bgf.fit(summary['frequency'], summary['recency'], summary['T']) bgf_coefficient=bgf.summary # Compute the customer alive probability summary['probability_alive'] = bgf.conditional_probability_alive(summary['frequency'], summary['recency'], summary['T']) #Predict future transaction for the next 300 days based on historical dataa t = 300 summary['Predicted No. of Transaction'] = round(bgf.conditional_expected_number_of_purchases_up_to_time(t, summary['frequency'], summary['recency'], summary['T']),2) summary.sort_values(by='Predicted No. of Transaction', ascending=False).head(10).reset_index() #Hidden trends ax = sns.countplot(x="Predicted No. of Transaction",data=summary) plt.scatter(summary['probability_alive'],summary['Predicted No. of Transaction']) summary_correlation=summary.corr() summary1=summary summary1['Active/Inactive']=summary1['Predicted No. of Transaction'].apply(lambda x:"ACTIVE" if x>=1 else "INACTIVE") selector=st.selectbox('Select User ID',summary1['CustomerID'],index=None,placeholder='Select Customer ID') summary2=summary1[['CustomerID','Active/Inactive']] if selector is not None: selected=summary2.loc[summary1['CustomerID']==selector].iloc[0,1] st.write('STATUS:',selected) trends= data_clv.groupby('CustomerID')['Quantity'].mean().reset_index() trends1= data_clv.groupby('CustomerID')['TotalSales'].mean().reset_index() summary1=summary1.merge(trends, how='left',on='CustomerID') summary1=summary1.merge(trends1, how='left',on='CustomerID') summary1.drop('probability_alive',axis=1,inplace=True) summary1.rename(columns={'Quantity':'Average Quantity','TotalSales':'Average Sales Value'},inplace=True) out=summary1.to_csv().encode('utf-8') st.download_button(label='DOWNLOAD RESULT',data=out, file_name='CLV_OUTPUT.csv',mime='text/csv')