|
import gradio as gr |
|
import pandas as pd |
|
import numpy as np |
|
import datetime |
|
from sklearn.preprocessing import StandardScaler |
|
from sklearn.cluster import KMeans |
|
|
|
def calculate_rfm(df): |
|
|
|
df['Fecha compra'] = pd.to_datetime(df['Fecha compra'], format='%m/%d/%Y') |
|
today = datetime.datetime.now().date() |
|
fecha_actual = pd.to_datetime(today).to_numpy().astype('datetime64[D]') |
|
df['recencia'] = (fecha_actual - df['Fecha compra'].to_numpy().astype('datetime64[D]')) |
|
df['recencia'] = df['recencia'].astype('timedelta64[D]').astype(int) |
|
|
|
|
|
grouped = df.groupby('Email') |
|
frequency = grouped['Email'].count().to_frame().rename(columns={"Email": "frecuencia"}) |
|
monetary = grouped['Valor compra'].sum().to_frame().rename(columns={'Valor compra': 'monetario'}) |
|
monetary['monetario'] = monetary['monetario'].round(2) |
|
|
|
|
|
df = df.join(frequency, on='Email') |
|
df = df.join(monetary, on='Email') |
|
|
|
|
|
df = df.sort_values(by=['Email', 'Fecha compra'], ascending=False) |
|
df = df.drop_duplicates(subset='Email', keep='first') |
|
|
|
|
|
df.drop(['Fecha compra', 'Valor compra'], axis=1, inplace=True) |
|
df.set_index('Email', inplace=True) |
|
|
|
|
|
scaler = StandardScaler() |
|
scaled_columns = ['recencia', 'frecuencia', 'monetario'] |
|
scaled_values = scaler.fit_transform(df[scaled_columns]) |
|
z_scores = np.abs(scaled_values) |
|
outlier_mask = (z_scores > 3).any(axis=1) |
|
|
|
for i, column in enumerate(scaled_columns): |
|
df[f"{column}_scaled"] = scaled_values[:, i] |
|
|
|
df = df[~outlier_mask] |
|
|
|
|
|
np.random.seed(0) |
|
scaled_columns = ['recencia_scaled', 'frecuencia_scaled', 'monetario_scaled'] |
|
kmeans = KMeans(n_clusters=5, n_init=10) |
|
rfm_clusters = kmeans.fit_predict(df[scaled_columns]) |
|
df = df.copy() |
|
df['cluster'] = rfm_clusters |
|
|
|
df['cluster'] = df['cluster'].replace({4: 'Dormidos', 0: 'Nuevos', 2: 'Potenciales', 1: 'En riesgo', 3: 'Fieles'}) |
|
|
|
|
|
df.drop(scaled_columns, axis=1, inplace=True) |
|
|
|
|
|
df = df.reset_index() |
|
|
|
|
|
return df[['Email', 'recencia', 'frecuencia', 'monetario', 'cluster']] |
|
|
|
def read_csv(file): |
|
df = pd.read_csv(file.name) |
|
return calculate_rfm(df).head(10) |
|
|
|
demo= gr.Interface(fn=read_csv, |
|
inputs=[gr.components.File(label="Select a CSV file")], |
|
outputs="dataframe", |
|
title="RFM Automatizado con Inteligencia Artificial") |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |