File size: 3,644 Bytes
b929aa7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fb52313
 
b929aa7
fb52313
 
b929aa7
 
fb52313
b929aa7
 
 
 
 
 
 
 
 
fb52313
c319fc9
b929aa7
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import streamlit as st
import pandas as pd
import openpyxl
import lifetimes
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

import warnings
warnings.filterwarnings('ignore')


st.set_page_config(page_title='Detect Inactive Records')
st.title('Detect Inactive Records')
st.subheader('Upload your Excel file')
uploaded_file = st.file_uploader('Choose a XLSX file', type='xlsx')
if uploaded_file is not None:
    st.markdown('---')
    # Loading the data
    @st.cache_data
    def load_excel(file1):
            df = pd.read_excel(file1, engine='openpyxl', parse_dates=['InvoiceDate'])
            return df
        
    data = load_excel(uploaded_file)
    st.subheader('Data Preview')
    st.dataframe(data.head(20))
    # Feature selection
    features = ['CustomerID', 'InvoiceNo', 'InvoiceDate', 'Quantity', 'UnitPrice']
    data_clv = data[features]
    data_clv['TotalSales'] = data_clv['Quantity'].multiply(data_clv['UnitPrice'])
    #Check for missing values
    mising=pd.DataFrame(zip(data_clv.isnull().sum(), data_clv.isnull().sum()/len(data_clv)), columns=['Count', 'Proportion'], index=data_clv.columns)
    data_clv = data_clv[pd.notnull(data_clv['CustomerID'])]
    #Remove -ve values
    data_clv = data_clv[data_clv['TotalSales'] > 0]
    
    # Creating the summary data using summary_data_from_transaction_data function
    summary = lifetimes.utils.summary_data_from_transaction_data(data_clv, 'CustomerID', 'InvoiceDate', 'TotalSales' )
    summary = summary.reset_index()
    summary['frequency'].plot(kind='hist', bins=50)
    one_time_buyers = round(sum(summary['frequency'] == 0)/float(len(summary))*(100),2)
    # Fitting the BG/NBD model
    bgf = lifetimes.BetaGeoFitter(penalizer_coef=0.0)
    bgf.fit(summary['frequency'], summary['recency'], summary['T'])
    bgf_coefficient=bgf.summary
    # Compute the customer alive probability
    summary['probability_alive'] = bgf.conditional_probability_alive(summary['frequency'], summary['recency'], summary['T'])
    #Predict future transaction for the next 300 days based on historical dataa
    t = 300
    summary['Predicted No. of Transaction'] = round(bgf.conditional_expected_number_of_purchases_up_to_time(t, summary['frequency'], summary['recency'], summary['T']),2)
    summary.sort_values(by='Predicted No. of Transaction', ascending=False).head(10).reset_index()
    #Hidden trends
    ax = sns.countplot(x="Predicted No. of Transaction",data=summary)
    plt.scatter(summary['probability_alive'],summary['Predicted No. of Transaction'])
    summary_correlation=summary.corr()
    summary1=summary
    summary1['Active/Inactive']=summary1['Predicted No. of Transaction'].apply(lambda x:"ACTIVE" if x>=1 else "INACTIVE")
    selector=st.selectbox('Select User ID',summary1['CustomerID'],index=None,placeholder='Select Customer ID')
    summary2=summary1[['CustomerID','Active/Inactive']]
    if selector is not None:
        selected=summary2.loc[summary1['CustomerID']==selector].iloc[0,1]
        st.write('STATUS:',selected)
    trends= data_clv.groupby('CustomerID')['Quantity'].mean().reset_index()
    trends1= data_clv.groupby('CustomerID')['TotalSales'].mean().reset_index()
    summary1=summary1.merge(trends, how='left',on='CustomerID')
    summary1=summary1.merge(trends1, how='left',on='CustomerID')
    summary1.drop('probability_alive',axis=1,inplace=True)
    summary1.rename(columns={'Quantity':'Average Quantity','TotalSales':'Average Sales Value'},inplace=True)
    out=summary1.to_csv().encode('utf-8')
    st.download_button(label='DOWNLOAD RESULT',data=out, file_name='CLV_OUTPUT.csv',mime='text/csv')