3morrrrr commited on
Commit
0caadca
·
verified ·
1 Parent(s): 8425b4c

Update helper.py

Browse files
Files changed (1) hide show
  1. helper.py +26 -27
helper.py CHANGED
@@ -8,62 +8,60 @@ import pandas as pd
8
  from sklearn.preprocessing import MinMaxScaler
9
 
10
  def assign_main_accounts(creators_file, chatter_files):
11
-
12
  creators = pd.read_excel(creators_file)
13
- creators.columns = creators.columns.str.strip()
14
-
15
- column_mapping = {
16
- "Creator": "Creator",
17
- "Total earnings": "Total earnings",
18
- "Subscription": "Subscription",
19
- "Active Fans": "ActiveFans",
20
- "Total active fans": "ActiveFans",
21
- }
22
- creators.rename(columns={k: v for k, v in column_mapping.items() if k in creators.columns}, inplace=True)
23
-
24
- required_columns = ["Creator", "Total earnings", "Subscription", "ActiveFans"]
25
  missing_columns = [col for col in required_columns if col not in creators.columns]
26
  if missing_columns:
27
  raise KeyError(f"Missing required columns in creators file: {missing_columns}")
28
 
29
-
30
  creators["Total earnings"] = creators["Total earnings"].replace("[\$,]", "", regex=True).astype(float)
31
  creators["Subscription"] = creators["Subscription"].replace("[\$,]", "", regex=True).astype(float)
32
- creators["ActiveFans"] = pd.to_numeric(creators["ActiveFans"], errors="coerce").fillna(0)
33
-
34
 
 
35
  scaler = MinMaxScaler()
36
  creators[["Earnings_Normalized", "Subscriptions_Normalized"]] = scaler.fit_transform(
37
  creators[["Total earnings", "Subscription"]]
 
 
 
38
  0.7 * creators["Earnings_Normalized"] + 0.3 * creators["Subscriptions_Normalized"]
39
  ) * creators["Penalty Factor"]
40
  creators["Rank"] = creators["Score"].rank(ascending=False)
41
 
42
-
43
  creators = creators.sort_values(by="Rank").reset_index(drop=True)
44
 
45
-
46
  assignments = {}
47
  for idx, chatter_file in enumerate(chatter_files):
48
  shift_name = ["overnight", "day", "prime"][idx]
 
 
 
49
  if "Final Rating" not in chatters.columns:
50
  raise KeyError(f"'Final Rating' column is missing in {chatter_file}")
51
 
52
-
53
  chatters = chatters.sort_values(by="Final Rating", ascending=False).reset_index(drop=True)
54
 
55
-
56
  num_chatters = len(chatters)
57
- creators_to_assign = creators.iloc[:num_chatters]
58
- chatters["Main Account"] = creators_to_assign["Creator"].values
59
-
60
 
 
61
  assignments[shift_name] = chatters.to_dict(orient="records")
62
 
63
- assignments["creator_names"] = creators["Creator"].tolist()
64
- print("DEBUG: Chatter Data with Main Account Assignments:")
65
- print(chatters.head())
66
-
67
 
68
  return assignments
69
 
@@ -72,6 +70,7 @@ def assign_main_accounts(creators_file, chatter_files):
72
 
73
 
74
 
 
75
  def save_processed_files(assignments, output_dir):
76
  """
77
  Save processed files for main assignments, ensuring chatter names and main accounts are preserved correctly.
 
8
  from sklearn.preprocessing import MinMaxScaler
9
 
10
  def assign_main_accounts(creators_file, chatter_files):
11
+ # Load and process creators data
12
  creators = pd.read_excel(creators_file)
13
+ creators.columns = creators.columns.str.strip() # Clean up column names
14
+
15
+ # Ensure required columns are present
16
+ required_columns = ["Creator", "Total earnings", "Subscription", "Total active fans"]
 
 
 
 
 
 
 
 
17
  missing_columns = [col for col in required_columns if col not in creators.columns]
18
  if missing_columns:
19
  raise KeyError(f"Missing required columns in creators file: {missing_columns}")
20
 
21
+ # Normalize and calculate scores for creators
22
  creators["Total earnings"] = creators["Total earnings"].replace("[\$,]", "", regex=True).astype(float)
23
  creators["Subscription"] = creators["Subscription"].replace("[\$,]", "", regex=True).astype(float)
24
+ creators["Total active fans"] = creators["Total active fans"].fillna(0).astype(int)
 
25
 
26
+ # Scale data for ranking
27
  scaler = MinMaxScaler()
28
  creators[["Earnings_Normalized", "Subscriptions_Normalized"]] = scaler.fit_transform(
29
  creators[["Total earnings", "Subscription"]]
30
+ )
31
+ creators["Penalty Factor"] = 1 - abs(creators["Earnings_Normalized"] - creators["Subscriptions_Normalized"])
32
+ creators["Score"] = (
33
  0.7 * creators["Earnings_Normalized"] + 0.3 * creators["Subscriptions_Normalized"]
34
  ) * creators["Penalty Factor"]
35
  creators["Rank"] = creators["Score"].rank(ascending=False)
36
 
37
+ # Sort creators by rank
38
  creators = creators.sort_values(by="Rank").reset_index(drop=True)
39
 
40
+ # Assign main accounts to chatters
41
  assignments = {}
42
  for idx, chatter_file in enumerate(chatter_files):
43
  shift_name = ["overnight", "day", "prime"][idx]
44
+ chatters = pd.read_excel(chatter_file)
45
+ chatters.columns = chatters.columns.str.strip()
46
+
47
  if "Final Rating" not in chatters.columns:
48
  raise KeyError(f"'Final Rating' column is missing in {chatter_file}")
49
 
50
+ # Sort chatters by performance
51
  chatters = chatters.sort_values(by="Final Rating", ascending=False).reset_index(drop=True)
52
 
53
+ # Match top creators with top chatters
54
  num_chatters = len(chatters)
55
+ top_creators = creators.iloc[:num_chatters]
56
+ chatters["Main Account"] = top_creators["Creator"].values
 
57
 
58
+ # Save assignment
59
  assignments[shift_name] = chatters.to_dict(orient="records")
60
 
61
+ # Add processed creators for schedule generation
62
+ assignments["creators"] = creators[["Creator", "Total active fans"]].rename(
63
+ columns={"Total active fans": "ActiveFans"}
64
+ ).to_dict(orient="records")
65
 
66
  return assignments
67
 
 
70
 
71
 
72
 
73
+
74
  def save_processed_files(assignments, output_dir):
75
  """
76
  Save processed files for main assignments, ensuring chatter names and main accounts are preserved correctly.