Spaces:
Running
Running
Zekun Wu
commited on
Commit
•
657095c
1
Parent(s):
5aaaa8b
update
Browse files- util/generation.py +32 -20
util/generation.py
CHANGED
@@ -55,34 +55,46 @@ def invoke_retry(prompt,agent,parameters):
|
|
55 |
|
56 |
raise Exception("Failed to complete the API call after maximum retry attempts.")
|
57 |
|
58 |
-
def
|
59 |
-
"""
|
60 |
-
|
61 |
-
Accepts test_type to switch between 'multiple' and 'single' processing modes.
|
62 |
-
"""
|
63 |
-
if test_type == 'multiple':
|
64 |
-
categories = ['Privilege', 'Protect', 'Neutral']
|
65 |
-
elif test_type == 'single':
|
66 |
-
categories = ['Counterfactual', 'Neutral']
|
67 |
-
else:
|
68 |
-
raise ValueError("test_type must be either 'multiple' or 'single'")
|
69 |
|
70 |
-
# Initialize scores dictionary
|
71 |
-
scores = {category: [[] for _ in range(len(df))] for category in categories}
|
72 |
-
|
73 |
-
# Processing loop
|
74 |
for run in tqdm(range(num_run), desc="Processing runs", unit="run"):
|
75 |
for index, row in tqdm(df.iterrows(), total=len(df), desc="Processing entries", unit="entry"):
|
76 |
-
for
|
77 |
-
prompt_temp = create_summary(row,
|
78 |
-
|
79 |
-
|
|
|
|
|
80 |
|
81 |
# Assign score lists and calculate average scores
|
82 |
-
for category in
|
83 |
df[f'{category}_Scores'] = pd.Series([lst for lst in scores[category]])
|
84 |
df[f'{category}_Avg_Score'] = df[f'{category}_Scores'].apply(
|
85 |
lambda scores: sum(score for score in scores if score is not None) / len(scores) if scores else None
|
86 |
)
|
87 |
|
88 |
return df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
raise Exception("Failed to complete the API call after maximum retry attempts.")
|
57 |
|
58 |
+
def process_scores_multiple(df, num_run,parameters,privilege_label,protect_label,agent,group_name,occupation):
|
59 |
+
""" Process entries and compute scores concurrently, with progress updates. """
|
60 |
+
scores = {key: [[] for _ in range(len(df))] for key in ['Privilege', 'Protect', 'Neutral']}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
|
|
|
|
|
|
|
|
62 |
for run in tqdm(range(num_run), desc="Processing runs", unit="run"):
|
63 |
for index, row in tqdm(df.iterrows(), total=len(df), desc="Processing entries", unit="entry"):
|
64 |
+
for key, label in zip(['Privilege', 'Protect', 'Neutral'], [privilege_label, protect_label, None]):
|
65 |
+
prompt_temp = create_summary(row,group_name,label,occupation)
|
66 |
+
# print(f"Run {run + 1} - Entry {index + 1} - {key}:\n{prompt_temp}")
|
67 |
+
# print("=============================================================")
|
68 |
+
result = invoke_retry(prompt_temp,agent,parameters)
|
69 |
+
scores[key][index].append(result)
|
70 |
|
71 |
# Assign score lists and calculate average scores
|
72 |
+
for category in ['Privilege', 'Protect', 'Neutral']:
|
73 |
df[f'{category}_Scores'] = pd.Series([lst for lst in scores[category]])
|
74 |
df[f'{category}_Avg_Score'] = df[f'{category}_Scores'].apply(
|
75 |
lambda scores: sum(score for score in scores if score is not None) / len(scores) if scores else None
|
76 |
)
|
77 |
|
78 |
return df
|
79 |
+
|
80 |
+
def process_scores_single(df, num_run,parameters,counterfactual_label,agent,group_name,occupation):
|
81 |
+
""" Process entries and compute scores concurrently, with progress updates. """
|
82 |
+
scores = {key: [[] for _ in range(len(df))] for key in ['Counterfactual', 'Neutral']}
|
83 |
+
|
84 |
+
for run in tqdm(range(num_run), desc="Processing runs", unit="run"):
|
85 |
+
for index, row in tqdm(df.iterrows(), total=len(df), desc="Processing entries", unit="entry"):
|
86 |
+
for key, label in zip(['Counterfactual', 'Neutral'], [counterfactual_label, None]):
|
87 |
+
prompt_temp = create_summary(row,group_name,label,occupation)
|
88 |
+
# print(f"Run {run + 1} - Entry {index + 1} - {key}:\n{prompt_temp}")
|
89 |
+
# print("=============================================================")
|
90 |
+
result = invoke_retry(prompt_temp,agent,parameters)
|
91 |
+
scores[key][index].append(result)
|
92 |
+
|
93 |
+
# Assign score lists and calculate average scores
|
94 |
+
for category in ['Counterfactual', 'Neutral']:
|
95 |
+
df[f'{category}_Scores'] = pd.Series([lst for lst in scores[category]])
|
96 |
+
df[f'{category}_Avg_Score'] = df[f'{category}_Scores'].apply(
|
97 |
+
lambda scores: sum(score for score in scores if score is not None) / len(scores) if scores else None
|
98 |
+
)
|
99 |
+
|
100 |
+
return df
|