import pandas as pd import os from sklearn.preprocessing import MinMaxScaler import random import re def assign_main_accounts(creators_file, chatter_files): creators = pd.read_excel(creators_file) creators.columns = creators.columns.str.strip() column_mapping = { "Creator": "Creator", "Total earnings": "Total earnings", "Subscription": "Subscription", "Active Fans": "ActiveFans", "Total active fans": "ActiveFans", } creators.rename(columns={k: v for k, v in column_mapping.items() if k in creators.columns}, inplace=True) required_columns = ["Creator", "Total earnings", "Subscription", "ActiveFans"] missing_columns = [col for col in required_columns if col not in creators.columns] if missing_columns: raise KeyError(f"Missing required columns in creators file: {missing_columns}") creators["Total earnings"] = creators["Total earnings"].replace("[\$,]", "", regex=True).astype(float) creators["Subscription"] = creators["Subscription"].replace("[\$,]", "", regex=True).astype(float) creators["ActiveFans"] = pd.to_numeric(creators["ActiveFans"], errors="coerce").fillna(0) scaler = MinMaxScaler() creators[["Earnings_Normalized", "Subscriptions_Normalized"]] = scaler.fit_transform( creators[["Total earnings", "Subscription"]] ) creators["Penalty Factor"] = 1 - abs(creators["Earnings_Normalized"] - creators["Subscriptions_Normalized"]) creators["Score"] = ( 0.7 * creators["Earnings_Normalized"] + 0.3 * creators["Subscriptions_Normalized"] ) * creators["Penalty Factor"] creators["Rank"] = creators["Score"].rank(ascending=False) creators = creators.sort_values(by="Rank").reset_index(drop=True) assignments = {} for idx, chatter_file in enumerate(chatter_files): shift_name = ["overnight", "day", "prime"][idx] chatters = pd.read_excel(chatter_file) chatters.columns = chatters.columns.str.strip() if "Final Rating" not in chatters.columns: raise KeyError(f"'Final Rating' column is missing in {chatter_file}") chatters = chatters.sort_values(by="Final Rating", ascending=False).reset_index(drop=True) num_chatters = len(chatters) creators_to_assign = creators.iloc[:num_chatters] chatters["Main Account"] = creators_to_assign["Creator"].values assignments[shift_name] = chatters.to_dict(orient="records") assignments["creator_names"] = creators["Creator"].tolist() print("DEBUG: Chatter Data with Main Account Assignments:") print(chatters.head()) return assignments def save_processed_files(assignments, output_dir): """ Save processed files for main assignments, ensuring chatter names and main accounts are preserved correctly. """ for shift, data in assignments.items(): if shift == "creator_names": continue # Create a DataFrame from the assignment data df = pd.DataFrame(data) # Handle multiple 'Main Account' columns and ensure there's only one if "Main Account_x" in df.columns and "Main Account_y" in df.columns: df["Main Account"] = df["Main Account_x"].fillna(df["Main Account_y"]) df.drop(columns=["Main Account_x", "Main Account_y"], inplace=True) elif "Main Account_x" in df.columns: df.rename(columns={"Main Account_x": "Main Account"}, inplace=True) elif "Main Account_y" in df.columns: df.rename(columns={"Main Account_y": "Main Account"}, inplace=True) # Ensure all other columns (like 'Final Rating', 'Desired Off Day', etc.) are retained required_columns = ["Name", "Main Account", "Final Rating", "Available Work Days", "Desired Off Day"] for col in required_columns: if col not in df.columns: df[col] = None # Add missing columns as empty # Ensure proper ordering of columns for consistency column_order = ["Name", "Main Account", "Final Rating", "Available Work Days", "Desired Off Day"] df = df[[col for col in column_order if col in df.columns] + [col for col in df.columns if col not in column_order]] # Save the cleaned DataFrame output_path = os.path.join(output_dir, f"Updated_{shift}_file.xlsx") df.to_excel(output_path, index=False) # Debugging: Verify the saved file contains the right columns print(f"DEBUG: Saved File for {shift}: {output_path}") print(df.head()) def generate_schedule(chatter_files, account_file): """ Generate schedules for different shifts (Overnight, Day, Prime) using chatter and account data. """ schedules = {} accounts = pd.read_excel(account_file) # Validate required columns in the account file if not {"Account", "ActiveFans"}.issubset(accounts.columns): raise KeyError("The account file must contain 'Account' and 'ActiveFans' columns.") shift_names = ["Overnight", "Day", "Prime"] for idx, chatter_file in enumerate(chatter_files): shift_name = shift_names[idx] chatters = pd.read_excel(chatter_file) # Debugging: Print initial chatter data print(f"DEBUG: Initial {shift_name} Chatter Data:") print(chatters.head()) # Clean chatter data chatters = clean_chatter_data(chatters) # Debugging: Print cleaned chatter data print(f"DEBUG: Cleaned {shift_name} Chatter Data:") print(chatters.head()) # Create a blank schedule template schedule = create_schedule_template(accounts) # Debugging: Print initial schedule template print(f"DEBUG: Initial Schedule Template for {shift_name}:") print(schedule.head()) # Assign main accounts to the schedule schedule = assign_main_accounts_to_schedule(schedule, chatters) # Debugging: Print schedule after assigning main accounts print(f"DEBUG: Schedule After Assigning Main Accounts for {shift_name}:") print(schedule.head()) # Assign days off based on chatter preferences schedule = assign_off_days(schedule, chatters) # Debugging: Print schedule after assigning off days print(f"DEBUG: Schedule After Assigning Off Days for {shift_name}:") print(schedule.head()) # Randomly fill the remaining slots while respecting constraints schedule = randomly_fill_slots(schedule, chatters) # Debugging: Print final schedule for the shift print(f"DEBUG: Final Schedule for {shift_name}:") print(schedule.head()) # Save the schedule schedules[shift_name] = schedule.to_dict(orient="records") return schedules def create_schedule_template(account_data): """ Create a blank schedule template with required columns. """ if "Account" not in account_data.columns or "ActiveFans" not in account_data.columns: raise KeyError("Account data must contain 'Account' and 'ActiveFans' columns.") schedule_template = account_data[["Account", "ActiveFans"]].copy() for day in ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]: schedule_template[day] = None # Initialize all days as None return schedule_template def assign_main_accounts_to_schedule(schedule, chatter_data): """ Assign main accounts to the schedule based on chatter data. """ days_of_week = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"] # Dynamically detect the correct column for the main account main_account_col = next( (col for col in ["Main Account", "Main_Account_x", "Main_Account_y"] if col in chatter_data.columns), None ) if not main_account_col: raise KeyError("Main Account column not found in chatter data.") # Iterate over each chatter and assign their main account to the schedule for _, chatter in chatter_data.iterrows(): chatter_name = chatter["Name"] main_account = chatter[main_account_col] if pd.notnull(main_account): # Locate the row in the schedule that matches the main account matching_row = schedule[schedule["Account"].str.lower() == main_account.lower()] if not matching_row.empty: row_index = matching_row.index[0] # Assign the chatter's name to all days where the slot is empty for day in days_of_week: if pd.isnull(schedule.at[row_index, day]): schedule.at[row_index, day] = chatter_name # Debugging: Output updated schedule for verification print("DEBUG: Updated Schedule after assigning main accounts:") print(schedule) return schedule def clean_chatter_data(chatter_data): """ Clean and prepare chatter data for scheduling. """ # Merge any duplicate 'Main Account' columns if "Main Account_x" in chatter_data.columns and "Main Account_y" in chatter_data.columns: chatter_data["Main Account"] = chatter_data["Main Account_x"].fillna(chatter_data["Main Account_y"]) chatter_data.drop(columns=["Main Account_x", "Main Account_y"], inplace=True) elif "Main Account_x" in chatter_data.columns: chatter_data.rename(columns={"Main Account_x": "Main Account"}, inplace=True) elif "Main Account_y" in chatter_data.columns: chatter_data.rename(columns={"Main Account_y": "Main Account"}, inplace=True) # Validate required columns required_columns = ["Name", "Main Account", "Final Rating", "Available Work Days"] for col in required_columns: if col not in chatter_data.columns: raise KeyError(f"Missing required column in chatter data: {col}") # Clean and format other data fields if needed chatter_data["WorkDays"] = pd.to_numeric(chatter_data.get("Available Work Days", 6), errors="coerce").fillna(6).astype(int) chatter_data["Desired Off Day"] = chatter_data["Desired Off Day"].fillna("").apply( lambda x: [day.strip().capitalize() for day in re.split(r"[ ,]+", x) if day.strip()] ) return chatter_data def assign_off_days(schedule, chatter_data): """ Assign days off for each chatter based on their 'Desired Off Day' field. """ if "Desired Off Day" not in chatter_data.columns: chatter_data["Desired Off Day"] = "" days_of_week = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"] for _, chatter in chatter_data.iterrows(): chatter_name = chatter["Name"] desired_off_days = chatter["Desired Off Day"] # Ensure desired_off_days is parsed into a list if isinstance(desired_off_days, str): desired_off_days = [ day.strip().capitalize() for day in desired_off_days.split(",") if day.strip().capitalize() in days_of_week ] # Assign None to the schedule for each desired off day for day in desired_off_days: if day in days_of_week: schedule.loc[schedule[day] == chatter_name, day] = None # Debugging: Verify schedule after assigning off days print("DEBUG: Schedule After Assigning Off Days:") print(schedule.head()) return schedule def randomly_fill_slots(schedule, chatter_data, max_accounts_per_day=3, max_fans_per_day=1000): """ Randomly fill remaining slots in the schedule while respecting constraints. """ days_of_week = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"] daily_accounts = {chatter: {day: 0 for day in days_of_week} for chatter in chatter_data["Name"]} daily_fans = {chatter: {day: 0 for day in days_of_week} for chatter in chatter_data["Name"]} chatters_list = chatter_data["Name"].tolist() for day in days_of_week: for i, row in schedule.iterrows(): if pd.isnull(schedule.at[i, day]): # If the slot is empty random.shuffle(chatters_list) # Shuffle chatters to randomize assignments for chatter in chatters_list: active_fans = row["ActiveFans"] if ( daily_accounts[chatter][day] < max_accounts_per_day and daily_fans[chatter][day] + active_fans <= max_fans_per_day ): schedule.at[i, day] = chatter daily_accounts[chatter][day] += 1 daily_fans[chatter][day] += active_fans break # Debugging: Verify schedule after filling slots print("DEBUG: Schedule After Randomly Filling Slots:") print(schedule.head()) return schedule