Spaces:
Sleeping
Sleeping
Update helper.py
Browse files
helper.py
CHANGED
@@ -8,62 +8,60 @@ import pandas as pd
|
|
8 |
from sklearn.preprocessing import MinMaxScaler
|
9 |
|
10 |
def assign_main_accounts(creators_file, chatter_files):
|
11 |
-
|
12 |
creators = pd.read_excel(creators_file)
|
13 |
-
creators.columns = creators.columns.str.strip()
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
"Total earnings": "Total earnings",
|
18 |
-
"Subscription": "Subscription",
|
19 |
-
"Active Fans": "ActiveFans",
|
20 |
-
"Total active fans": "ActiveFans",
|
21 |
-
}
|
22 |
-
creators.rename(columns={k: v for k, v in column_mapping.items() if k in creators.columns}, inplace=True)
|
23 |
-
|
24 |
-
required_columns = ["Creator", "Total earnings", "Subscription", "ActiveFans"]
|
25 |
missing_columns = [col for col in required_columns if col not in creators.columns]
|
26 |
if missing_columns:
|
27 |
raise KeyError(f"Missing required columns in creators file: {missing_columns}")
|
28 |
|
29 |
-
|
30 |
creators["Total earnings"] = creators["Total earnings"].replace("[\$,]", "", regex=True).astype(float)
|
31 |
creators["Subscription"] = creators["Subscription"].replace("[\$,]", "", regex=True).astype(float)
|
32 |
-
creators["
|
33 |
-
|
34 |
|
|
|
35 |
scaler = MinMaxScaler()
|
36 |
creators[["Earnings_Normalized", "Subscriptions_Normalized"]] = scaler.fit_transform(
|
37 |
creators[["Total earnings", "Subscription"]]
|
|
|
|
|
|
|
38 |
0.7 * creators["Earnings_Normalized"] + 0.3 * creators["Subscriptions_Normalized"]
|
39 |
) * creators["Penalty Factor"]
|
40 |
creators["Rank"] = creators["Score"].rank(ascending=False)
|
41 |
|
42 |
-
|
43 |
creators = creators.sort_values(by="Rank").reset_index(drop=True)
|
44 |
|
45 |
-
|
46 |
assignments = {}
|
47 |
for idx, chatter_file in enumerate(chatter_files):
|
48 |
shift_name = ["overnight", "day", "prime"][idx]
|
|
|
|
|
|
|
49 |
if "Final Rating" not in chatters.columns:
|
50 |
raise KeyError(f"'Final Rating' column is missing in {chatter_file}")
|
51 |
|
52 |
-
|
53 |
chatters = chatters.sort_values(by="Final Rating", ascending=False).reset_index(drop=True)
|
54 |
|
55 |
-
|
56 |
num_chatters = len(chatters)
|
57 |
-
|
58 |
-
chatters["Main Account"] =
|
59 |
-
|
60 |
|
|
|
61 |
assignments[shift_name] = chatters.to_dict(orient="records")
|
62 |
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
|
68 |
return assignments
|
69 |
|
@@ -72,6 +70,7 @@ def assign_main_accounts(creators_file, chatter_files):
|
|
72 |
|
73 |
|
74 |
|
|
|
75 |
def save_processed_files(assignments, output_dir):
|
76 |
"""
|
77 |
Save processed files for main assignments, ensuring chatter names and main accounts are preserved correctly.
|
|
|
8 |
from sklearn.preprocessing import MinMaxScaler
|
9 |
|
10 |
def assign_main_accounts(creators_file, chatter_files):
|
11 |
+
# Load and process creators data
|
12 |
creators = pd.read_excel(creators_file)
|
13 |
+
creators.columns = creators.columns.str.strip() # Clean up column names
|
14 |
+
|
15 |
+
# Ensure required columns are present
|
16 |
+
required_columns = ["Creator", "Total earnings", "Subscription", "Total active fans"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
missing_columns = [col for col in required_columns if col not in creators.columns]
|
18 |
if missing_columns:
|
19 |
raise KeyError(f"Missing required columns in creators file: {missing_columns}")
|
20 |
|
21 |
+
# Normalize and calculate scores for creators
|
22 |
creators["Total earnings"] = creators["Total earnings"].replace("[\$,]", "", regex=True).astype(float)
|
23 |
creators["Subscription"] = creators["Subscription"].replace("[\$,]", "", regex=True).astype(float)
|
24 |
+
creators["Total active fans"] = creators["Total active fans"].fillna(0).astype(int)
|
|
|
25 |
|
26 |
+
# Scale data for ranking
|
27 |
scaler = MinMaxScaler()
|
28 |
creators[["Earnings_Normalized", "Subscriptions_Normalized"]] = scaler.fit_transform(
|
29 |
creators[["Total earnings", "Subscription"]]
|
30 |
+
)
|
31 |
+
creators["Penalty Factor"] = 1 - abs(creators["Earnings_Normalized"] - creators["Subscriptions_Normalized"])
|
32 |
+
creators["Score"] = (
|
33 |
0.7 * creators["Earnings_Normalized"] + 0.3 * creators["Subscriptions_Normalized"]
|
34 |
) * creators["Penalty Factor"]
|
35 |
creators["Rank"] = creators["Score"].rank(ascending=False)
|
36 |
|
37 |
+
# Sort creators by rank
|
38 |
creators = creators.sort_values(by="Rank").reset_index(drop=True)
|
39 |
|
40 |
+
# Assign main accounts to chatters
|
41 |
assignments = {}
|
42 |
for idx, chatter_file in enumerate(chatter_files):
|
43 |
shift_name = ["overnight", "day", "prime"][idx]
|
44 |
+
chatters = pd.read_excel(chatter_file)
|
45 |
+
chatters.columns = chatters.columns.str.strip()
|
46 |
+
|
47 |
if "Final Rating" not in chatters.columns:
|
48 |
raise KeyError(f"'Final Rating' column is missing in {chatter_file}")
|
49 |
|
50 |
+
# Sort chatters by performance
|
51 |
chatters = chatters.sort_values(by="Final Rating", ascending=False).reset_index(drop=True)
|
52 |
|
53 |
+
# Match top creators with top chatters
|
54 |
num_chatters = len(chatters)
|
55 |
+
top_creators = creators.iloc[:num_chatters]
|
56 |
+
chatters["Main Account"] = top_creators["Creator"].values
|
|
|
57 |
|
58 |
+
# Save assignment
|
59 |
assignments[shift_name] = chatters.to_dict(orient="records")
|
60 |
|
61 |
+
# Add processed creators for schedule generation
|
62 |
+
assignments["creators"] = creators[["Creator", "Total active fans"]].rename(
|
63 |
+
columns={"Total active fans": "ActiveFans"}
|
64 |
+
).to_dict(orient="records")
|
65 |
|
66 |
return assignments
|
67 |
|
|
|
70 |
|
71 |
|
72 |
|
73 |
+
|
74 |
def save_processed_files(assignments, output_dir):
|
75 |
"""
|
76 |
Save processed files for main assignments, ensuring chatter names and main accounts are preserved correctly.
|