rfmAutoV3 / app.py
Jhoeel's picture
Update app.py
898099b
raw
history blame
2.72 kB
import gradio as gr
import pandas as pd
import numpy as np
import datetime
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
def calculate_rfm(df):
# Convert 'Fecha compra' to datetime and calculate recency
df['Fecha compra'] = pd.to_datetime(df['Fecha compra'], format='%m/%d/%Y')
today = datetime.datetime.now().date()
fecha_actual = pd.to_datetime(today).to_numpy().astype('datetime64[D]')
df['recencia'] = (fecha_actual - df['Fecha compra'].to_numpy().astype('datetime64[D]'))
df['recencia'] = df['recencia'].astype('timedelta64[D]').astype(int)
# Group by 'Email' and calculate frequency and monetary value
grouped = df.groupby('Email')
frequency = grouped['Email'].count().to_frame().rename(columns={"Email": "frecuencia"})
monetary = grouped['Valor compra'].sum().to_frame().rename(columns={'Valor compra': 'monetario'})
monetary['monetario'] = monetary['monetario'].round(2)
# Join the recency dataframe with frequency and monetary dataframes
df = df.join(frequency, on='Email')
df = df.join(monetary, on='Email')
# Keep only the latest purchase for each customer
df = df.sort_values(by=['Email', 'Fecha compra'], ascending=False)
df = df.drop_duplicates(subset='Email', keep='first')
# Clean up the final dataframe
df.drop(['Fecha compra', 'Valor compra'], axis=1, inplace=True)
df.set_index('Email', inplace=True)
# Scale the features
scaler = StandardScaler()
scaled_columns = ['recencia', 'frecuencia', 'monetario']
scaled_values = scaler.fit_transform(df[scaled_columns])
z_scores = np.abs(scaled_values)
outlier_mask = (z_scores > 3).any(axis=1)
for i, column in enumerate(scaled_columns):
df[f"{column}_scaled"] = scaled_values[:, i]
df = df[~outlier_mask]
# Cluster the data
np.random.seed(0)
scaled_columns = ['recencia_scaled', 'frecuencia_scaled', 'monetario_scaled']
kmeans = KMeans(n_clusters=5, n_init=10)
rfm_clusters = kmeans.fit_predict(df[scaled_columns])
df = df.copy()
df['cluster'] = rfm_clusters
# Drop the scaled columns
df.drop(scaled_columns, axis=1, inplace=True)
# Reset the index
df = df.reset_index()
# Return the desired columns
return df[['Email', 'recencia', 'frecuencia', 'monetario', 'cluster']]
def read_csv(file):
df = pd.read_csv(file.name)
return calculate_rfm(df).head(10)
demo= gr.Interface(fn=read_csv,
inputs=[gr.components.File(label="Select a CSV file")],
outputs="dataframe",
title="RFM Automatizado con Inteligencia Artificial")
if __name__ == "__main__":
demo.launch()