Spaces:

niladridutta
/

detect_inactive_records

Sleeping

File size: 3,644 Bytes

import streamlit as st
import pandas as pd
import openpyxl
import lifetimes
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

import warnings
warnings.filterwarnings('ignore')


st.set_page_config(page_title='Detect Inactive Records')
st.title('Detect Inactive Records')
st.subheader('Upload your Excel file')
uploaded_file = st.file_uploader('Choose a XLSX file', type='xlsx')
if uploaded_file is not None:
    st.markdown('---')
    # Loading the data
    @st.cache_data
    def load_excel(file1):
            df = pd.read_excel(file1, engine='openpyxl', parse_dates=['InvoiceDate'])
            return df
        
    data = load_excel(uploaded_file)
    st.subheader('Data Preview')
    st.dataframe(data.head(20))
    # Feature selection
    features = ['CustomerID', 'InvoiceNo', 'InvoiceDate', 'Quantity', 'UnitPrice']
    data_clv = data[features]
    data_clv['TotalSales'] = data_clv['Quantity'].multiply(data_clv['UnitPrice'])
    #Check for missing values
    mising=pd.DataFrame(zip(data_clv.isnull().sum(), data_clv.isnull().sum()/len(data_clv)), columns=['Count', 'Proportion'], index=data_clv.columns)
    data_clv = data_clv[pd.notnull(data_clv['CustomerID'])]
    #Remove -ve values
    data_clv = data_clv[data_clv['TotalSales'] > 0]
    
    # Creating the summary data using summary_data_from_transaction_data function
    summary = lifetimes.utils.summary_data_from_transaction_data(data_clv, 'CustomerID', 'InvoiceDate', 'TotalSales' )
    summary = summary.reset_index()
    summary['frequency'].plot(kind='hist', bins=50)
    one_time_buyers = round(sum(summary['frequency'] == 0)/float(len(summary))*(100),2)
    # Fitting the BG/NBD model
    bgf = lifetimes.BetaGeoFitter(penalizer_coef=0.0)
    bgf.fit(summary['frequency'], summary['recency'], summary['T'])
    bgf_coefficient=bgf.summary
    # Compute the customer alive probability
    summary['probability_alive'] = bgf.conditional_probability_alive(summary['frequency'], summary['recency'], summary['T'])
    #Predict future transaction for the next 300 days based on historical dataa
    t = 300
    summary['Predicted No. of Transaction'] = round(bgf.conditional_expected_number_of_purchases_up_to_time(t, summary['frequency'], summary['recency'], summary['T']),2)
    summary.sort_values(by='Predicted No. of Transaction', ascending=False).head(10).reset_index()
    #Hidden trends
    ax = sns.countplot(x="Predicted No. of Transaction",data=summary)
    plt.scatter(summary['probability_alive'],summary['Predicted No. of Transaction'])
    summary_correlation=summary.corr()
    summary1=summary
    summary1['Active/Inactive']=summary1['Predicted No. of Transaction'].apply(lambda x:"ACTIVE" if x>=1 else "INACTIVE")
    selector=st.selectbox('Select User ID',summary1['CustomerID'],index=None,placeholder='Select Customer ID')
    summary2=summary1[['CustomerID','Active/Inactive']]
    if selector is not None:
        selected=summary2.loc[summary1['CustomerID']==selector].iloc[0,1]
        st.write('STATUS:',selected)
    trends= data_clv.groupby('CustomerID')['Quantity'].mean().reset_index()
    trends1= data_clv.groupby('CustomerID')['TotalSales'].mean().reset_index()
    summary1=summary1.merge(trends, how='left',on='CustomerID')
    summary1=summary1.merge(trends1, how='left',on='CustomerID')
    summary1.drop('probability_alive',axis=1,inplace=True)
    summary1.rename(columns={'Quantity':'Average Quantity','TotalSales':'Average Sales Value'},inplace=True)
    out=summary1.to_csv().encode('utf-8')
    st.download_button(label='DOWNLOAD RESULT',data=out, file_name='CLV_OUTPUT.csv',mime='text/csv')