rfmAutoV3 / app.py
Jhoeel's picture
Update app.py
102a3e9
import gradio as gr
import pandas as pd
import numpy as np
import datetime
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
def calculate_rfm(df):
# Convert 'Fecha compra' to datetime and calculate recency
df['Fecha compra'] = pd.to_datetime(df['Fecha compra'], format='%m/%d/%Y')
today = datetime.datetime.now().date()
fecha_actual = pd.to_datetime(today).to_numpy().astype('datetime64[D]')
df['recencia'] = (fecha_actual - df['Fecha compra'].to_numpy().astype('datetime64[D]'))
df['recencia'] = df['recencia'].astype('timedelta64[D]').astype(int)
# Group by 'Email' and calculate frequency and monetary value
grouped = df.groupby('Email')
frequency = grouped['Email'].count().to_frame().rename(columns={"Email": "frecuencia"})
monetary = grouped['Valor compra'].sum().to_frame().rename(columns={'Valor compra': 'monetario'})
monetary['monetario'] = monetary['monetario'].round(2)
# Join the recency dataframe with frequency and monetary dataframes
df = df.join(frequency, on='Email')
df = df.join(monetary, on='Email')
# Keep only the latest purchase for each customer
df = df.sort_values(by=['Email', 'Fecha compra'], ascending=False)
df = df.drop_duplicates(subset='Email', keep='first')
# Clean up the final dataframe
df.drop(['Fecha compra', 'Valor compra'], axis=1, inplace=True)
df.set_index('Email', inplace=True)
# Scale the features
scaler = StandardScaler()
scaled_columns = ['recencia', 'frecuencia', 'monetario']
scaled_values = scaler.fit_transform(df[scaled_columns])
z_scores = np.abs(scaled_values)
outlier_mask = (z_scores > 3).any(axis=1)
for i, column in enumerate(scaled_columns):
df[f"{column}_scaled"] = scaled_values[:, i]
df = df[~outlier_mask]
# Cluster the data
np.random.seed(0)
scaled_columns = ['recencia_scaled', 'frecuencia_scaled', 'monetario_scaled']
kmeans = KMeans(n_clusters=5, n_init=10)
rfm_clusters = kmeans.fit_predict(df[scaled_columns])
df = df.copy()
df['cluster'] = rfm_clusters
df['cluster'] = df['cluster'].replace({4: 'Dormidos', 0: 'Nuevos', 2: 'Potenciales', 1: 'En riesgo', 3: 'Fieles'})
# Drop the scaled columns
df.drop(scaled_columns, axis=1, inplace=True)
# Reset the index
df = df.reset_index()
# Return the desired columns
return df[['Email', 'recencia', 'frecuencia', 'monetario', 'cluster']]
def read_csv(file):
df = pd.read_csv(file.name)
return calculate_rfm(df).head(10)
demo= gr.Interface(fn=read_csv,
inputs=[gr.components.File(label="Select a CSV file")],
outputs="dataframe",
title="RFM Automatizado con Inteligencia Artificial")
if __name__ == "__main__":
demo.launch()