Schedule_App / helper.py
3morrrrr's picture
Update helper.py
58c2739 verified
raw
history blame
12.7 kB
import pandas as pd
import os
from sklearn.preprocessing import MinMaxScaler
import random
import re
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
def assign_main_accounts(creators_file, chatter_files):
creators = pd.read_excel(creators_file)
creators.columns = creators.columns.str.strip()
# Debugging: Check initial columns
print("DEBUG: Initial Columns in Creator File:", creators.columns)
column_mapping = {
"Creator": "Creator",
"Total earnings": "Total earnings",
"Subscription": "Subscription",
"Active Fans": "ActiveFans",
"Total active fans": "ActiveFans",
}
creators.rename(columns={k: v for k, v in column_mapping.items() if k in creators.columns}, inplace=True)
# Debugging: Check renamed columns
print("DEBUG: Renamed Columns in Creator File:", creators.columns)
required_columns = ["Creator", "Total earnings", "Subscription", "ActiveFans"]
missing_columns = [col for col in required_columns if col not in creators.columns]
if missing_columns:
raise KeyError(f"Missing required columns in creators file: {missing_columns}")
# Process creators file
creators["Total earnings"] = creators["Total earnings"].replace("[\$,]", "", regex=True).astype(float)
creators["Subscription"] = creators["Subscription"].replace("[\$,]", "", regex=True).astype(float)
creators["ActiveFans"] = pd.to_numeric(creators["ActiveFans"], errors="coerce").fillna(0)
scaler = MinMaxScaler()
creators[["Earnings_Normalized", "Subscriptions_Normalized"]] = scaler.fit_transform(
creators[["Total earnings", "Subscription"]]
)
creators["Penalty Factor"] = 1 - abs(creators["Earnings_Normalized"] - creators["Subscriptions_Normalized"])
creators["Score"] = (
0.7 * creators["Earnings_Normalized"] + 0.3 * creators["Subscriptions_Normalized"]
) * creators["Penalty Factor"]
creators["Rank"] = creators["Score"].rank(ascending=False)
creators = creators.sort_values(by="Rank").reset_index(drop=True)
processed_creator_file = creators[["Creator", "ActiveFans"]]
updated_chatter_files = []
assignments = []
for idx, chatter_file in enumerate(chatter_files):
chatters = pd.read_excel(chatter_file)
chatters["Main Account"] = creators.iloc[:len(chatters)]["Creator"].values
updated_chatter_files.append(chatters)
assignments.append(chatters)
return updated_chatter_files, processed_creator_file, pd.concat(assignments)
def save_processed_files(assignments, output_dir):
"""
Save processed files for main assignments, ensuring chatter names and main accounts are preserved correctly.
"""
for shift, data in assignments.items():
if shift == "creator_names":
continue
# Create a DataFrame from the assignment data
df = pd.DataFrame(data)
# Handle multiple 'Main Account' columns and ensure there's only one
if "Main Account_x" in df.columns and "Main Account_y" in df.columns:
df["Main Account"] = df["Main Account_x"].fillna(df["Main Account_y"])
df.drop(columns=["Main Account_x", "Main Account_y"], inplace=True)
elif "Main Account_x" in df.columns:
df.rename(columns={"Main Account_x": "Main Account"}, inplace=True)
elif "Main Account_y" in df.columns:
df.rename(columns={"Main Account_y": "Main Account"}, inplace=True)
# Ensure all other columns (like 'Final Rating', 'Desired Off Day', etc.) are retained
required_columns = ["Name", "Main Account", "Final Rating", "Available Work Days", "Desired Off Day"]
for col in required_columns:
if col not in df.columns:
df[col] = None # Add missing columns as empty
# Ensure proper ordering of columns for consistency
column_order = ["Name", "Main Account", "Final Rating", "Available Work Days", "Desired Off Day"]
df = df[[col for col in column_order if col in df.columns] + [col for col in df.columns if col not in column_order]]
# Save the cleaned DataFrame
output_path = os.path.join(output_dir, f"Updated_{shift}_file.xlsx")
df.to_excel(output_path, index=False)
# Debugging: Verify the saved file contains the right columns
print(f"DEBUG: Saved File for {shift}: {output_path}")
print(df.head())
def generate_schedule(chatter_files, account_data):
"""
Generate schedules for different shifts (Overnight, Day, Prime) using chatter and account data.
"""
schedules = {}
# Validate required columns in the account data
if not {"Creator", "ActiveFans"}.issubset(account_data.columns):
raise KeyError("The account data must contain 'Creator' and 'ActiveFans' columns.")
shift_names = ["Overnight", "Day", "Prime"]
for idx, chatter_df in enumerate(chatter_files):
shift_name = shift_names[idx]
# Debugging: Print initial chatter data
print(f"DEBUG: Initial {shift_name} Chatter Data:")
print(chatter_df.head())
# Clean chatter data
chatter_df = clean_chatter_data(chatter_df)
# Debugging: Print cleaned chatter data
print(f"DEBUG: Cleaned {shift_name} Chatter Data:")
print(chatter_df.head())
# Create a blank schedule template
schedule = create_schedule_template(account_data)
# Debugging: Print initial schedule template
print(f"DEBUG: Initial Schedule Template for {shift_name}:")
print(schedule.head())
# Assign main accounts to the schedule
schedule = assign_main_accounts_to_schedule(schedule, chatter_df)
# Debugging: Print schedule after assigning main accounts
print(f"DEBUG: Schedule After Assigning Main Accounts for {shift_name}:")
print(schedule.head())
# Assign days off based on chatter preferences
schedule = assign_off_days(schedule, chatter_df)
# Debugging: Print schedule after assigning off days
print(f"DEBUG: Schedule After Assigning Off Days for {shift_name}:")
print(schedule.head())
# Randomly fill the remaining slots while respecting constraints
schedule = randomly_fill_slots(schedule, chatter_df)
# Debugging: Print final schedule for the shift
print(f"DEBUG: Final Schedule for {shift_name}:")
print(schedule.head())
# Save the schedule
schedules[shift_name] = schedule.to_dict(orient="records")
return schedules
def create_schedule_template(account_data):
"""
Create a blank schedule template with required columns.
"""
if "Account" not in account_data.columns or "ActiveFans" not in account_data.columns:
raise KeyError("Account data must contain 'Account' and 'ActiveFans' columns.")
schedule_template = account_data[["Account", "ActiveFans"]].copy()
for day in ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]:
schedule_template[day] = None # Initialize all days as None
return schedule_template
def assign_main_accounts_to_schedule(schedule, chatter_data):
"""
Assign main accounts to the schedule based on chatter data.
"""
days_of_week = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
# Dynamically detect the correct column for the main account
main_account_col = next(
(col for col in ["Main Account", "Main_Account_x", "Main_Account_y"] if col in chatter_data.columns), None
)
if not main_account_col:
raise KeyError("Main Account column not found in chatter data.")
# Iterate over each chatter and assign their main account to the schedule
for _, chatter in chatter_data.iterrows():
chatter_name = chatter["Name"]
main_account = chatter[main_account_col]
if pd.notnull(main_account):
# Locate the row in the schedule that matches the main account
matching_row = schedule[schedule["Account"].str.lower() == main_account.lower()]
if not matching_row.empty:
row_index = matching_row.index[0]
# Assign the chatter's name to all days where the slot is empty
for day in days_of_week:
if pd.isnull(schedule.at[row_index, day]):
schedule.at[row_index, day] = chatter_name
# Debugging: Output updated schedule for verification
print("DEBUG: Updated Schedule after assigning main accounts:")
print(schedule)
return schedule
def clean_chatter_data(chatter_data):
"""
Clean and prepare chatter data for scheduling.
"""
# Merge any duplicate 'Main Account' columns
if "Main Account_x" in chatter_data.columns and "Main Account_y" in chatter_data.columns:
chatter_data["Main Account"] = chatter_data["Main Account_x"].fillna(chatter_data["Main Account_y"])
chatter_data.drop(columns=["Main Account_x", "Main Account_y"], inplace=True)
elif "Main Account_x" in chatter_data.columns:
chatter_data.rename(columns={"Main Account_x": "Main Account"}, inplace=True)
elif "Main Account_y" in chatter_data.columns:
chatter_data.rename(columns={"Main Account_y": "Main Account"}, inplace=True)
# Validate required columns
required_columns = ["Name", "Main Account", "Final Rating", "Available Work Days"]
for col in required_columns:
if col not in chatter_data.columns:
raise KeyError(f"Missing required column in chatter data: {col}")
# Clean and format other data fields if needed
chatter_data["WorkDays"] = pd.to_numeric(chatter_data.get("Available Work Days", 6), errors="coerce").fillna(6).astype(int)
chatter_data["Desired Off Day"] = chatter_data["Desired Off Day"].fillna("").apply(
lambda x: [day.strip().capitalize() for day in re.split(r"[ ,]+", x) if day.strip()]
)
return chatter_data
def assign_off_days(schedule, chatter_data):
"""
Assign days off for each chatter based on their 'Desired Off Day' field.
"""
if "Desired Off Day" not in chatter_data.columns:
chatter_data["Desired Off Day"] = ""
days_of_week = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
for _, chatter in chatter_data.iterrows():
chatter_name = chatter["Name"]
desired_off_days = chatter["Desired Off Day"]
# Ensure desired_off_days is parsed into a list
if isinstance(desired_off_days, str):
desired_off_days = [
day.strip().capitalize()
for day in desired_off_days.split(",")
if day.strip().capitalize() in days_of_week
]
# Assign None to the schedule for each desired off day
for day in desired_off_days:
if day in days_of_week:
schedule.loc[schedule[day] == chatter_name, day] = None
# Debugging: Verify schedule after assigning off days
print("DEBUG: Schedule After Assigning Off Days:")
print(schedule.head())
return schedule
def randomly_fill_slots(schedule, chatter_data, max_accounts_per_day=3, max_fans_per_day=1000):
"""
Randomly fill remaining slots in the schedule while respecting constraints.
"""
days_of_week = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
daily_accounts = {chatter: {day: 0 for day in days_of_week} for chatter in chatter_data["Name"]}
daily_fans = {chatter: {day: 0 for day in days_of_week} for chatter in chatter_data["Name"]}
chatters_list = chatter_data["Name"].tolist()
for day in days_of_week:
for i, row in schedule.iterrows():
if pd.isnull(schedule.at[i, day]): # If the slot is empty
random.shuffle(chatters_list) # Shuffle chatters to randomize assignments
for chatter in chatters_list:
active_fans = row["ActiveFans"]
if (
daily_accounts[chatter][day] < max_accounts_per_day and
daily_fans[chatter][day] + active_fans <= max_fans_per_day
):
schedule.at[i, day] = chatter
daily_accounts[chatter][day] += 1
daily_fans[chatter][day] += active_fans
break
# Debugging: Verify schedule after filling slots
print("DEBUG: Schedule After Randomly Filling Slots:")
print(schedule.head())
return schedule