3morrrrr commited on
Commit
ff94921
·
verified ·
1 Parent(s): de01ad6

Upload helper.py

Browse files
Files changed (1) hide show
  1. helper.py +315 -0
helper.py ADDED
@@ -0,0 +1,315 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import os
3
+ from sklearn.preprocessing import MinMaxScaler
4
+ import random
5
+ import re
6
+
7
+ def assign_main_accounts(creators_file, chatter_files):
8
+ creators = pd.read_excel(creators_file)
9
+ creators.columns = creators.columns.str.strip()
10
+
11
+ column_mapping = {
12
+ "Creator": "Creator",
13
+ "Total earnings": "Total earnings",
14
+ "Subscription": "Subscription",
15
+ "Active Fans": "ActiveFans",
16
+ "Total active fans": "ActiveFans",
17
+ }
18
+ creators.rename(columns={k: v for k, v in column_mapping.items() if k in creators.columns}, inplace=True)
19
+
20
+ required_columns = ["Creator", "Total earnings", "Subscription", "ActiveFans"]
21
+ missing_columns = [col for col in required_columns if col not in creators.columns]
22
+ if missing_columns:
23
+ raise KeyError(f"Missing required columns in creators file: {missing_columns}")
24
+
25
+ creators["Total earnings"] = creators["Total earnings"].replace("[\$,]", "", regex=True).astype(float)
26
+ creators["Subscription"] = creators["Subscription"].replace("[\$,]", "", regex=True).astype(float)
27
+ creators["ActiveFans"] = pd.to_numeric(creators["ActiveFans"], errors="coerce").fillna(0)
28
+
29
+ scaler = MinMaxScaler()
30
+ creators[["Earnings_Normalized", "Subscriptions_Normalized"]] = scaler.fit_transform(
31
+ creators[["Total earnings", "Subscription"]]
32
+ )
33
+ creators["Penalty Factor"] = 1 - abs(creators["Earnings_Normalized"] - creators["Subscriptions_Normalized"])
34
+ creators["Score"] = (
35
+ 0.7 * creators["Earnings_Normalized"] + 0.3 * creators["Subscriptions_Normalized"]
36
+ ) * creators["Penalty Factor"]
37
+ creators["Rank"] = creators["Score"].rank(ascending=False)
38
+ creators = creators.sort_values(by="Rank").reset_index(drop=True)
39
+
40
+ assignments = {}
41
+ for idx, chatter_file in enumerate(chatter_files):
42
+ shift_name = ["overnight", "day", "prime"][idx]
43
+ chatters = pd.read_excel(chatter_file)
44
+ chatters.columns = chatters.columns.str.strip()
45
+
46
+ if "Final Rating" not in chatters.columns:
47
+ raise KeyError(f"'Final Rating' column is missing in {chatter_file}")
48
+
49
+ chatters = chatters.sort_values(by="Final Rating", ascending=False).reset_index(drop=True)
50
+ num_chatters = len(chatters)
51
+ creators_to_assign = creators.iloc[:num_chatters]
52
+ chatters["Main Account"] = creators_to_assign["Creator"].values
53
+
54
+ assignments[shift_name] = chatters.to_dict(orient="records")
55
+
56
+ assignments["creator_names"] = creators["Creator"].tolist()
57
+ print("DEBUG: Chatter Data with Main Account Assignments:")
58
+ print(chatters.head())
59
+
60
+ return assignments
61
+
62
+
63
+
64
+ def save_processed_files(assignments, output_dir):
65
+ """
66
+ Save processed files for main assignments, ensuring chatter names and main accounts are preserved correctly.
67
+ """
68
+ for shift, data in assignments.items():
69
+ if shift == "creator_names":
70
+ continue
71
+
72
+ # Create a DataFrame from the assignment data
73
+ df = pd.DataFrame(data)
74
+
75
+ # Handle multiple 'Main Account' columns and ensure there's only one
76
+ if "Main Account_x" in df.columns and "Main Account_y" in df.columns:
77
+ df["Main Account"] = df["Main Account_x"].fillna(df["Main Account_y"])
78
+ df.drop(columns=["Main Account_x", "Main Account_y"], inplace=True)
79
+ elif "Main Account_x" in df.columns:
80
+ df.rename(columns={"Main Account_x": "Main Account"}, inplace=True)
81
+ elif "Main Account_y" in df.columns:
82
+ df.rename(columns={"Main Account_y": "Main Account"}, inplace=True)
83
+
84
+ # Ensure all other columns (like 'Final Rating', 'Desired Off Day', etc.) are retained
85
+ required_columns = ["Name", "Main Account", "Final Rating", "Available Work Days", "Desired Off Day"]
86
+ for col in required_columns:
87
+ if col not in df.columns:
88
+ df[col] = None # Add missing columns as empty
89
+
90
+ # Ensure proper ordering of columns for consistency
91
+ column_order = ["Name", "Main Account", "Final Rating", "Available Work Days", "Desired Off Day"]
92
+ df = df[[col for col in column_order if col in df.columns] + [col for col in df.columns if col not in column_order]]
93
+
94
+ # Save the cleaned DataFrame
95
+ output_path = os.path.join(output_dir, f"Updated_{shift}_file.xlsx")
96
+ df.to_excel(output_path, index=False)
97
+
98
+ # Debugging: Verify the saved file contains the right columns
99
+ print(f"DEBUG: Saved File for {shift}: {output_path}")
100
+ print(df.head())
101
+
102
+
103
+
104
+
105
+ def generate_schedule(chatter_files, account_file):
106
+ """
107
+ Generate schedules for different shifts (Overnight, Day, Prime) using chatter and account data.
108
+ """
109
+ schedules = {}
110
+ accounts = pd.read_excel(account_file)
111
+
112
+ # Validate required columns in the account file
113
+ if not {"Account", "ActiveFans"}.issubset(accounts.columns):
114
+ raise KeyError("The account file must contain 'Account' and 'ActiveFans' columns.")
115
+
116
+ shift_names = ["Overnight", "Day", "Prime"]
117
+
118
+ for idx, chatter_file in enumerate(chatter_files):
119
+ shift_name = shift_names[idx]
120
+ chatters = pd.read_excel(chatter_file)
121
+
122
+ # Debugging: Print initial chatter data
123
+ print(f"DEBUG: Initial {shift_name} Chatter Data:")
124
+ print(chatters.head())
125
+
126
+ # Clean chatter data
127
+ chatters = clean_chatter_data(chatters)
128
+
129
+ # Debugging: Print cleaned chatter data
130
+ print(f"DEBUG: Cleaned {shift_name} Chatter Data:")
131
+ print(chatters.head())
132
+
133
+ # Create a blank schedule template
134
+ schedule = create_schedule_template(accounts)
135
+
136
+ # Debugging: Print initial schedule template
137
+ print(f"DEBUG: Initial Schedule Template for {shift_name}:")
138
+ print(schedule.head())
139
+
140
+ # Assign main accounts to the schedule
141
+ schedule = assign_main_accounts_to_schedule(schedule, chatters)
142
+
143
+ # Debugging: Print schedule after assigning main accounts
144
+ print(f"DEBUG: Schedule After Assigning Main Accounts for {shift_name}:")
145
+ print(schedule.head())
146
+
147
+ # Assign days off based on chatter preferences
148
+ schedule = assign_off_days(schedule, chatters)
149
+
150
+ # Debugging: Print schedule after assigning off days
151
+ print(f"DEBUG: Schedule After Assigning Off Days for {shift_name}:")
152
+ print(schedule.head())
153
+
154
+ # Randomly fill the remaining slots while respecting constraints
155
+ schedule = randomly_fill_slots(schedule, chatters)
156
+
157
+ # Debugging: Print final schedule for the shift
158
+ print(f"DEBUG: Final Schedule for {shift_name}:")
159
+ print(schedule.head())
160
+
161
+ # Save the schedule
162
+ schedules[shift_name] = schedule.to_dict(orient="records")
163
+
164
+ return schedules
165
+
166
+
167
+
168
+
169
+
170
+
171
+ def create_schedule_template(account_data):
172
+ """
173
+ Create a blank schedule template with required columns.
174
+ """
175
+ if "Account" not in account_data.columns or "ActiveFans" not in account_data.columns:
176
+ raise KeyError("Account data must contain 'Account' and 'ActiveFans' columns.")
177
+
178
+ schedule_template = account_data[["Account", "ActiveFans"]].copy()
179
+ for day in ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]:
180
+ schedule_template[day] = None # Initialize all days as None
181
+
182
+ return schedule_template
183
+
184
+
185
+
186
+ def assign_main_accounts_to_schedule(schedule, chatter_data):
187
+ """
188
+ Assign main accounts to the schedule based on chatter data.
189
+ """
190
+ days_of_week = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
191
+
192
+ # Dynamically detect the correct column for the main account
193
+ main_account_col = next(
194
+ (col for col in ["Main Account", "Main_Account_x", "Main_Account_y"] if col in chatter_data.columns), None
195
+ )
196
+
197
+ if not main_account_col:
198
+ raise KeyError("Main Account column not found in chatter data.")
199
+
200
+ # Iterate over each chatter and assign their main account to the schedule
201
+ for _, chatter in chatter_data.iterrows():
202
+ chatter_name = chatter["Name"]
203
+ main_account = chatter[main_account_col]
204
+
205
+ if pd.notnull(main_account):
206
+ # Locate the row in the schedule that matches the main account
207
+ matching_row = schedule[schedule["Account"].str.lower() == main_account.lower()]
208
+
209
+ if not matching_row.empty:
210
+ row_index = matching_row.index[0]
211
+
212
+ # Assign the chatter's name to all days where the slot is empty
213
+ for day in days_of_week:
214
+ if pd.isnull(schedule.at[row_index, day]):
215
+ schedule.at[row_index, day] = chatter_name
216
+
217
+ # Debugging: Output updated schedule for verification
218
+ print("DEBUG: Updated Schedule after assigning main accounts:")
219
+ print(schedule)
220
+
221
+ return schedule
222
+
223
+
224
+
225
+
226
+
227
+ def clean_chatter_data(chatter_data):
228
+ """
229
+ Clean and prepare chatter data for scheduling.
230
+ """
231
+ # Merge any duplicate 'Main Account' columns
232
+ if "Main Account_x" in chatter_data.columns and "Main Account_y" in chatter_data.columns:
233
+ chatter_data["Main Account"] = chatter_data["Main Account_x"].fillna(chatter_data["Main Account_y"])
234
+ chatter_data.drop(columns=["Main Account_x", "Main Account_y"], inplace=True)
235
+ elif "Main Account_x" in chatter_data.columns:
236
+ chatter_data.rename(columns={"Main Account_x": "Main Account"}, inplace=True)
237
+ elif "Main Account_y" in chatter_data.columns:
238
+ chatter_data.rename(columns={"Main Account_y": "Main Account"}, inplace=True)
239
+
240
+ # Validate required columns
241
+ required_columns = ["Name", "Main Account", "Final Rating", "Available Work Days"]
242
+ for col in required_columns:
243
+ if col not in chatter_data.columns:
244
+ raise KeyError(f"Missing required column in chatter data: {col}")
245
+
246
+ # Clean and format other data fields if needed
247
+ chatter_data["WorkDays"] = pd.to_numeric(chatter_data.get("Available Work Days", 6), errors="coerce").fillna(6).astype(int)
248
+ chatter_data["Desired Off Day"] = chatter_data["Desired Off Day"].fillna("").apply(
249
+ lambda x: [day.strip().capitalize() for day in re.split(r"[ ,]+", x) if day.strip()]
250
+ )
251
+
252
+ return chatter_data
253
+
254
+
255
+ def assign_off_days(schedule, chatter_data):
256
+ """
257
+ Assign days off for each chatter based on their 'Desired Off Day' field.
258
+ """
259
+ if "Desired Off Day" not in chatter_data.columns:
260
+ chatter_data["Desired Off Day"] = ""
261
+
262
+ days_of_week = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
263
+
264
+ for _, chatter in chatter_data.iterrows():
265
+ chatter_name = chatter["Name"]
266
+ desired_off_days = chatter["Desired Off Day"]
267
+
268
+ # Ensure desired_off_days is parsed into a list
269
+ if isinstance(desired_off_days, str):
270
+ desired_off_days = [
271
+ day.strip().capitalize()
272
+ for day in desired_off_days.split(",")
273
+ if day.strip().capitalize() in days_of_week
274
+ ]
275
+
276
+ # Assign None to the schedule for each desired off day
277
+ for day in desired_off_days:
278
+ if day in days_of_week:
279
+ schedule.loc[schedule[day] == chatter_name, day] = None
280
+
281
+ # Debugging: Verify schedule after assigning off days
282
+ print("DEBUG: Schedule After Assigning Off Days:")
283
+ print(schedule.head())
284
+
285
+ return schedule
286
+
287
+ def randomly_fill_slots(schedule, chatter_data, max_accounts_per_day=3, max_fans_per_day=1000):
288
+ """
289
+ Randomly fill remaining slots in the schedule while respecting constraints.
290
+ """
291
+ days_of_week = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
292
+ daily_accounts = {chatter: {day: 0 for day in days_of_week} for chatter in chatter_data["Name"]}
293
+ daily_fans = {chatter: {day: 0 for day in days_of_week} for chatter in chatter_data["Name"]}
294
+ chatters_list = chatter_data["Name"].tolist()
295
+
296
+ for day in days_of_week:
297
+ for i, row in schedule.iterrows():
298
+ if pd.isnull(schedule.at[i, day]): # If the slot is empty
299
+ random.shuffle(chatters_list) # Shuffle chatters to randomize assignments
300
+ for chatter in chatters_list:
301
+ active_fans = row["ActiveFans"]
302
+ if (
303
+ daily_accounts[chatter][day] < max_accounts_per_day and
304
+ daily_fans[chatter][day] + active_fans <= max_fans_per_day
305
+ ):
306
+ schedule.at[i, day] = chatter
307
+ daily_accounts[chatter][day] += 1
308
+ daily_fans[chatter][day] += active_fans
309
+ break
310
+
311
+ # Debugging: Verify schedule after filling slots
312
+ print("DEBUG: Schedule After Randomly Filling Slots:")
313
+ print(schedule.head())
314
+
315
+ return schedule