Spaces:
Sleeping
Sleeping
Upload helper.py
Browse files
helper.py
ADDED
@@ -0,0 +1,315 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import os
|
3 |
+
from sklearn.preprocessing import MinMaxScaler
|
4 |
+
import random
|
5 |
+
import re
|
6 |
+
|
7 |
+
def assign_main_accounts(creators_file, chatter_files):
|
8 |
+
creators = pd.read_excel(creators_file)
|
9 |
+
creators.columns = creators.columns.str.strip()
|
10 |
+
|
11 |
+
column_mapping = {
|
12 |
+
"Creator": "Creator",
|
13 |
+
"Total earnings": "Total earnings",
|
14 |
+
"Subscription": "Subscription",
|
15 |
+
"Active Fans": "ActiveFans",
|
16 |
+
"Total active fans": "ActiveFans",
|
17 |
+
}
|
18 |
+
creators.rename(columns={k: v for k, v in column_mapping.items() if k in creators.columns}, inplace=True)
|
19 |
+
|
20 |
+
required_columns = ["Creator", "Total earnings", "Subscription", "ActiveFans"]
|
21 |
+
missing_columns = [col for col in required_columns if col not in creators.columns]
|
22 |
+
if missing_columns:
|
23 |
+
raise KeyError(f"Missing required columns in creators file: {missing_columns}")
|
24 |
+
|
25 |
+
creators["Total earnings"] = creators["Total earnings"].replace("[\$,]", "", regex=True).astype(float)
|
26 |
+
creators["Subscription"] = creators["Subscription"].replace("[\$,]", "", regex=True).astype(float)
|
27 |
+
creators["ActiveFans"] = pd.to_numeric(creators["ActiveFans"], errors="coerce").fillna(0)
|
28 |
+
|
29 |
+
scaler = MinMaxScaler()
|
30 |
+
creators[["Earnings_Normalized", "Subscriptions_Normalized"]] = scaler.fit_transform(
|
31 |
+
creators[["Total earnings", "Subscription"]]
|
32 |
+
)
|
33 |
+
creators["Penalty Factor"] = 1 - abs(creators["Earnings_Normalized"] - creators["Subscriptions_Normalized"])
|
34 |
+
creators["Score"] = (
|
35 |
+
0.7 * creators["Earnings_Normalized"] + 0.3 * creators["Subscriptions_Normalized"]
|
36 |
+
) * creators["Penalty Factor"]
|
37 |
+
creators["Rank"] = creators["Score"].rank(ascending=False)
|
38 |
+
creators = creators.sort_values(by="Rank").reset_index(drop=True)
|
39 |
+
|
40 |
+
assignments = {}
|
41 |
+
for idx, chatter_file in enumerate(chatter_files):
|
42 |
+
shift_name = ["overnight", "day", "prime"][idx]
|
43 |
+
chatters = pd.read_excel(chatter_file)
|
44 |
+
chatters.columns = chatters.columns.str.strip()
|
45 |
+
|
46 |
+
if "Final Rating" not in chatters.columns:
|
47 |
+
raise KeyError(f"'Final Rating' column is missing in {chatter_file}")
|
48 |
+
|
49 |
+
chatters = chatters.sort_values(by="Final Rating", ascending=False).reset_index(drop=True)
|
50 |
+
num_chatters = len(chatters)
|
51 |
+
creators_to_assign = creators.iloc[:num_chatters]
|
52 |
+
chatters["Main Account"] = creators_to_assign["Creator"].values
|
53 |
+
|
54 |
+
assignments[shift_name] = chatters.to_dict(orient="records")
|
55 |
+
|
56 |
+
assignments["creator_names"] = creators["Creator"].tolist()
|
57 |
+
print("DEBUG: Chatter Data with Main Account Assignments:")
|
58 |
+
print(chatters.head())
|
59 |
+
|
60 |
+
return assignments
|
61 |
+
|
62 |
+
|
63 |
+
|
64 |
+
def save_processed_files(assignments, output_dir):
|
65 |
+
"""
|
66 |
+
Save processed files for main assignments, ensuring chatter names and main accounts are preserved correctly.
|
67 |
+
"""
|
68 |
+
for shift, data in assignments.items():
|
69 |
+
if shift == "creator_names":
|
70 |
+
continue
|
71 |
+
|
72 |
+
# Create a DataFrame from the assignment data
|
73 |
+
df = pd.DataFrame(data)
|
74 |
+
|
75 |
+
# Handle multiple 'Main Account' columns and ensure there's only one
|
76 |
+
if "Main Account_x" in df.columns and "Main Account_y" in df.columns:
|
77 |
+
df["Main Account"] = df["Main Account_x"].fillna(df["Main Account_y"])
|
78 |
+
df.drop(columns=["Main Account_x", "Main Account_y"], inplace=True)
|
79 |
+
elif "Main Account_x" in df.columns:
|
80 |
+
df.rename(columns={"Main Account_x": "Main Account"}, inplace=True)
|
81 |
+
elif "Main Account_y" in df.columns:
|
82 |
+
df.rename(columns={"Main Account_y": "Main Account"}, inplace=True)
|
83 |
+
|
84 |
+
# Ensure all other columns (like 'Final Rating', 'Desired Off Day', etc.) are retained
|
85 |
+
required_columns = ["Name", "Main Account", "Final Rating", "Available Work Days", "Desired Off Day"]
|
86 |
+
for col in required_columns:
|
87 |
+
if col not in df.columns:
|
88 |
+
df[col] = None # Add missing columns as empty
|
89 |
+
|
90 |
+
# Ensure proper ordering of columns for consistency
|
91 |
+
column_order = ["Name", "Main Account", "Final Rating", "Available Work Days", "Desired Off Day"]
|
92 |
+
df = df[[col for col in column_order if col in df.columns] + [col for col in df.columns if col not in column_order]]
|
93 |
+
|
94 |
+
# Save the cleaned DataFrame
|
95 |
+
output_path = os.path.join(output_dir, f"Updated_{shift}_file.xlsx")
|
96 |
+
df.to_excel(output_path, index=False)
|
97 |
+
|
98 |
+
# Debugging: Verify the saved file contains the right columns
|
99 |
+
print(f"DEBUG: Saved File for {shift}: {output_path}")
|
100 |
+
print(df.head())
|
101 |
+
|
102 |
+
|
103 |
+
|
104 |
+
|
105 |
+
def generate_schedule(chatter_files, account_file):
|
106 |
+
"""
|
107 |
+
Generate schedules for different shifts (Overnight, Day, Prime) using chatter and account data.
|
108 |
+
"""
|
109 |
+
schedules = {}
|
110 |
+
accounts = pd.read_excel(account_file)
|
111 |
+
|
112 |
+
# Validate required columns in the account file
|
113 |
+
if not {"Account", "ActiveFans"}.issubset(accounts.columns):
|
114 |
+
raise KeyError("The account file must contain 'Account' and 'ActiveFans' columns.")
|
115 |
+
|
116 |
+
shift_names = ["Overnight", "Day", "Prime"]
|
117 |
+
|
118 |
+
for idx, chatter_file in enumerate(chatter_files):
|
119 |
+
shift_name = shift_names[idx]
|
120 |
+
chatters = pd.read_excel(chatter_file)
|
121 |
+
|
122 |
+
# Debugging: Print initial chatter data
|
123 |
+
print(f"DEBUG: Initial {shift_name} Chatter Data:")
|
124 |
+
print(chatters.head())
|
125 |
+
|
126 |
+
# Clean chatter data
|
127 |
+
chatters = clean_chatter_data(chatters)
|
128 |
+
|
129 |
+
# Debugging: Print cleaned chatter data
|
130 |
+
print(f"DEBUG: Cleaned {shift_name} Chatter Data:")
|
131 |
+
print(chatters.head())
|
132 |
+
|
133 |
+
# Create a blank schedule template
|
134 |
+
schedule = create_schedule_template(accounts)
|
135 |
+
|
136 |
+
# Debugging: Print initial schedule template
|
137 |
+
print(f"DEBUG: Initial Schedule Template for {shift_name}:")
|
138 |
+
print(schedule.head())
|
139 |
+
|
140 |
+
# Assign main accounts to the schedule
|
141 |
+
schedule = assign_main_accounts_to_schedule(schedule, chatters)
|
142 |
+
|
143 |
+
# Debugging: Print schedule after assigning main accounts
|
144 |
+
print(f"DEBUG: Schedule After Assigning Main Accounts for {shift_name}:")
|
145 |
+
print(schedule.head())
|
146 |
+
|
147 |
+
# Assign days off based on chatter preferences
|
148 |
+
schedule = assign_off_days(schedule, chatters)
|
149 |
+
|
150 |
+
# Debugging: Print schedule after assigning off days
|
151 |
+
print(f"DEBUG: Schedule After Assigning Off Days for {shift_name}:")
|
152 |
+
print(schedule.head())
|
153 |
+
|
154 |
+
# Randomly fill the remaining slots while respecting constraints
|
155 |
+
schedule = randomly_fill_slots(schedule, chatters)
|
156 |
+
|
157 |
+
# Debugging: Print final schedule for the shift
|
158 |
+
print(f"DEBUG: Final Schedule for {shift_name}:")
|
159 |
+
print(schedule.head())
|
160 |
+
|
161 |
+
# Save the schedule
|
162 |
+
schedules[shift_name] = schedule.to_dict(orient="records")
|
163 |
+
|
164 |
+
return schedules
|
165 |
+
|
166 |
+
|
167 |
+
|
168 |
+
|
169 |
+
|
170 |
+
|
171 |
+
def create_schedule_template(account_data):
|
172 |
+
"""
|
173 |
+
Create a blank schedule template with required columns.
|
174 |
+
"""
|
175 |
+
if "Account" not in account_data.columns or "ActiveFans" not in account_data.columns:
|
176 |
+
raise KeyError("Account data must contain 'Account' and 'ActiveFans' columns.")
|
177 |
+
|
178 |
+
schedule_template = account_data[["Account", "ActiveFans"]].copy()
|
179 |
+
for day in ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]:
|
180 |
+
schedule_template[day] = None # Initialize all days as None
|
181 |
+
|
182 |
+
return schedule_template
|
183 |
+
|
184 |
+
|
185 |
+
|
186 |
+
def assign_main_accounts_to_schedule(schedule, chatter_data):
|
187 |
+
"""
|
188 |
+
Assign main accounts to the schedule based on chatter data.
|
189 |
+
"""
|
190 |
+
days_of_week = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
|
191 |
+
|
192 |
+
# Dynamically detect the correct column for the main account
|
193 |
+
main_account_col = next(
|
194 |
+
(col for col in ["Main Account", "Main_Account_x", "Main_Account_y"] if col in chatter_data.columns), None
|
195 |
+
)
|
196 |
+
|
197 |
+
if not main_account_col:
|
198 |
+
raise KeyError("Main Account column not found in chatter data.")
|
199 |
+
|
200 |
+
# Iterate over each chatter and assign their main account to the schedule
|
201 |
+
for _, chatter in chatter_data.iterrows():
|
202 |
+
chatter_name = chatter["Name"]
|
203 |
+
main_account = chatter[main_account_col]
|
204 |
+
|
205 |
+
if pd.notnull(main_account):
|
206 |
+
# Locate the row in the schedule that matches the main account
|
207 |
+
matching_row = schedule[schedule["Account"].str.lower() == main_account.lower()]
|
208 |
+
|
209 |
+
if not matching_row.empty:
|
210 |
+
row_index = matching_row.index[0]
|
211 |
+
|
212 |
+
# Assign the chatter's name to all days where the slot is empty
|
213 |
+
for day in days_of_week:
|
214 |
+
if pd.isnull(schedule.at[row_index, day]):
|
215 |
+
schedule.at[row_index, day] = chatter_name
|
216 |
+
|
217 |
+
# Debugging: Output updated schedule for verification
|
218 |
+
print("DEBUG: Updated Schedule after assigning main accounts:")
|
219 |
+
print(schedule)
|
220 |
+
|
221 |
+
return schedule
|
222 |
+
|
223 |
+
|
224 |
+
|
225 |
+
|
226 |
+
|
227 |
+
def clean_chatter_data(chatter_data):
|
228 |
+
"""
|
229 |
+
Clean and prepare chatter data for scheduling.
|
230 |
+
"""
|
231 |
+
# Merge any duplicate 'Main Account' columns
|
232 |
+
if "Main Account_x" in chatter_data.columns and "Main Account_y" in chatter_data.columns:
|
233 |
+
chatter_data["Main Account"] = chatter_data["Main Account_x"].fillna(chatter_data["Main Account_y"])
|
234 |
+
chatter_data.drop(columns=["Main Account_x", "Main Account_y"], inplace=True)
|
235 |
+
elif "Main Account_x" in chatter_data.columns:
|
236 |
+
chatter_data.rename(columns={"Main Account_x": "Main Account"}, inplace=True)
|
237 |
+
elif "Main Account_y" in chatter_data.columns:
|
238 |
+
chatter_data.rename(columns={"Main Account_y": "Main Account"}, inplace=True)
|
239 |
+
|
240 |
+
# Validate required columns
|
241 |
+
required_columns = ["Name", "Main Account", "Final Rating", "Available Work Days"]
|
242 |
+
for col in required_columns:
|
243 |
+
if col not in chatter_data.columns:
|
244 |
+
raise KeyError(f"Missing required column in chatter data: {col}")
|
245 |
+
|
246 |
+
# Clean and format other data fields if needed
|
247 |
+
chatter_data["WorkDays"] = pd.to_numeric(chatter_data.get("Available Work Days", 6), errors="coerce").fillna(6).astype(int)
|
248 |
+
chatter_data["Desired Off Day"] = chatter_data["Desired Off Day"].fillna("").apply(
|
249 |
+
lambda x: [day.strip().capitalize() for day in re.split(r"[ ,]+", x) if day.strip()]
|
250 |
+
)
|
251 |
+
|
252 |
+
return chatter_data
|
253 |
+
|
254 |
+
|
255 |
+
def assign_off_days(schedule, chatter_data):
|
256 |
+
"""
|
257 |
+
Assign days off for each chatter based on their 'Desired Off Day' field.
|
258 |
+
"""
|
259 |
+
if "Desired Off Day" not in chatter_data.columns:
|
260 |
+
chatter_data["Desired Off Day"] = ""
|
261 |
+
|
262 |
+
days_of_week = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
|
263 |
+
|
264 |
+
for _, chatter in chatter_data.iterrows():
|
265 |
+
chatter_name = chatter["Name"]
|
266 |
+
desired_off_days = chatter["Desired Off Day"]
|
267 |
+
|
268 |
+
# Ensure desired_off_days is parsed into a list
|
269 |
+
if isinstance(desired_off_days, str):
|
270 |
+
desired_off_days = [
|
271 |
+
day.strip().capitalize()
|
272 |
+
for day in desired_off_days.split(",")
|
273 |
+
if day.strip().capitalize() in days_of_week
|
274 |
+
]
|
275 |
+
|
276 |
+
# Assign None to the schedule for each desired off day
|
277 |
+
for day in desired_off_days:
|
278 |
+
if day in days_of_week:
|
279 |
+
schedule.loc[schedule[day] == chatter_name, day] = None
|
280 |
+
|
281 |
+
# Debugging: Verify schedule after assigning off days
|
282 |
+
print("DEBUG: Schedule After Assigning Off Days:")
|
283 |
+
print(schedule.head())
|
284 |
+
|
285 |
+
return schedule
|
286 |
+
|
287 |
+
def randomly_fill_slots(schedule, chatter_data, max_accounts_per_day=3, max_fans_per_day=1000):
|
288 |
+
"""
|
289 |
+
Randomly fill remaining slots in the schedule while respecting constraints.
|
290 |
+
"""
|
291 |
+
days_of_week = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
|
292 |
+
daily_accounts = {chatter: {day: 0 for day in days_of_week} for chatter in chatter_data["Name"]}
|
293 |
+
daily_fans = {chatter: {day: 0 for day in days_of_week} for chatter in chatter_data["Name"]}
|
294 |
+
chatters_list = chatter_data["Name"].tolist()
|
295 |
+
|
296 |
+
for day in days_of_week:
|
297 |
+
for i, row in schedule.iterrows():
|
298 |
+
if pd.isnull(schedule.at[i, day]): # If the slot is empty
|
299 |
+
random.shuffle(chatters_list) # Shuffle chatters to randomize assignments
|
300 |
+
for chatter in chatters_list:
|
301 |
+
active_fans = row["ActiveFans"]
|
302 |
+
if (
|
303 |
+
daily_accounts[chatter][day] < max_accounts_per_day and
|
304 |
+
daily_fans[chatter][day] + active_fans <= max_fans_per_day
|
305 |
+
):
|
306 |
+
schedule.at[i, day] = chatter
|
307 |
+
daily_accounts[chatter][day] += 1
|
308 |
+
daily_fans[chatter][day] += active_fans
|
309 |
+
break
|
310 |
+
|
311 |
+
# Debugging: Verify schedule after filling slots
|
312 |
+
print("DEBUG: Schedule After Randomly Filling Slots:")
|
313 |
+
print(schedule.head())
|
314 |
+
|
315 |
+
return schedule
|