Zekun Wu commited on
Commit
657095c
1 Parent(s): 5aaaa8b
Files changed (1) hide show
  1. util/generation.py +32 -20
util/generation.py CHANGED
@@ -55,34 +55,46 @@ def invoke_retry(prompt,agent,parameters):
55
 
56
  raise Exception("Failed to complete the API call after maximum retry attempts.")
57
 
58
- def process_scores(df, num_run, parameters, labels, agent, group_name, occupation, test_type='multiple'):
59
- """
60
- Process entries and compute scores concurrently, with progress updates.
61
- Accepts test_type to switch between 'multiple' and 'single' processing modes.
62
- """
63
- if test_type == 'multiple':
64
- categories = ['Privilege', 'Protect', 'Neutral']
65
- elif test_type == 'single':
66
- categories = ['Counterfactual', 'Neutral']
67
- else:
68
- raise ValueError("test_type must be either 'multiple' or 'single'")
69
 
70
- # Initialize scores dictionary
71
- scores = {category: [[] for _ in range(len(df))] for category in categories}
72
-
73
- # Processing loop
74
  for run in tqdm(range(num_run), desc="Processing runs", unit="run"):
75
  for index, row in tqdm(df.iterrows(), total=len(df), desc="Processing entries", unit="entry"):
76
- for category, label in zip(categories, labels):
77
- prompt_temp = create_summary(row, group_name, label, occupation)
78
- result = invoke_retry(prompt_temp, agent, parameters)
79
- scores[category][index].append(result)
 
 
80
 
81
  # Assign score lists and calculate average scores
82
- for category in categories:
83
  df[f'{category}_Scores'] = pd.Series([lst for lst in scores[category]])
84
  df[f'{category}_Avg_Score'] = df[f'{category}_Scores'].apply(
85
  lambda scores: sum(score for score in scores if score is not None) / len(scores) if scores else None
86
  )
87
 
88
  return df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
  raise Exception("Failed to complete the API call after maximum retry attempts.")
57
 
58
+ def process_scores_multiple(df, num_run,parameters,privilege_label,protect_label,agent,group_name,occupation):
59
+ """ Process entries and compute scores concurrently, with progress updates. """
60
+ scores = {key: [[] for _ in range(len(df))] for key in ['Privilege', 'Protect', 'Neutral']}
 
 
 
 
 
 
 
 
61
 
 
 
 
 
62
  for run in tqdm(range(num_run), desc="Processing runs", unit="run"):
63
  for index, row in tqdm(df.iterrows(), total=len(df), desc="Processing entries", unit="entry"):
64
+ for key, label in zip(['Privilege', 'Protect', 'Neutral'], [privilege_label, protect_label, None]):
65
+ prompt_temp = create_summary(row,group_name,label,occupation)
66
+ # print(f"Run {run + 1} - Entry {index + 1} - {key}:\n{prompt_temp}")
67
+ # print("=============================================================")
68
+ result = invoke_retry(prompt_temp,agent,parameters)
69
+ scores[key][index].append(result)
70
 
71
  # Assign score lists and calculate average scores
72
+ for category in ['Privilege', 'Protect', 'Neutral']:
73
  df[f'{category}_Scores'] = pd.Series([lst for lst in scores[category]])
74
  df[f'{category}_Avg_Score'] = df[f'{category}_Scores'].apply(
75
  lambda scores: sum(score for score in scores if score is not None) / len(scores) if scores else None
76
  )
77
 
78
  return df
79
+
80
+ def process_scores_single(df, num_run,parameters,counterfactual_label,agent,group_name,occupation):
81
+ """ Process entries and compute scores concurrently, with progress updates. """
82
+ scores = {key: [[] for _ in range(len(df))] for key in ['Counterfactual', 'Neutral']}
83
+
84
+ for run in tqdm(range(num_run), desc="Processing runs", unit="run"):
85
+ for index, row in tqdm(df.iterrows(), total=len(df), desc="Processing entries", unit="entry"):
86
+ for key, label in zip(['Counterfactual', 'Neutral'], [counterfactual_label, None]):
87
+ prompt_temp = create_summary(row,group_name,label,occupation)
88
+ # print(f"Run {run + 1} - Entry {index + 1} - {key}:\n{prompt_temp}")
89
+ # print("=============================================================")
90
+ result = invoke_retry(prompt_temp,agent,parameters)
91
+ scores[key][index].append(result)
92
+
93
+ # Assign score lists and calculate average scores
94
+ for category in ['Counterfactual', 'Neutral']:
95
+ df[f'{category}_Scores'] = pd.Series([lst for lst in scores[category]])
96
+ df[f'{category}_Avg_Score'] = df[f'{category}_Scores'].apply(
97
+ lambda scores: sum(score for score in scores if score is not None) / len(scores) if scores else None
98
+ )
99
+
100
+ return df