Spaces:

holistic-ai
/

job-fair

Running

App Files Files Community

Zekun Wu commited on May 2, 2024

Commit

657095c

1 Parent(s): 5aaaa8b

update

Browse files

Files changed (1) hide show

util/generation.py +32 -20

util/generation.py CHANGED Viewed

@@ -55,34 +55,46 @@ def invoke_retry(prompt,agent,parameters):
     raise Exception("Failed to complete the API call after maximum retry attempts.")
-def process_scores(df, num_run, parameters, labels, agent, group_name, occupation, test_type='multiple'):
-    """
-    Process entries and compute scores concurrently, with progress updates.
-    Accepts test_type to switch between 'multiple' and 'single' processing modes.
-    """
-    if test_type == 'multiple':
-        categories = ['Privilege', 'Protect', 'Neutral']
-    elif test_type == 'single':
-        categories = ['Counterfactual', 'Neutral']
-    else:
-        raise ValueError("test_type must be either 'multiple' or 'single'")
-    # Initialize scores dictionary
-    scores = {category: [[] for _ in range(len(df))] for category in categories}
-    # Processing loop
     for run in tqdm(range(num_run), desc="Processing runs", unit="run"):
         for index, row in tqdm(df.iterrows(), total=len(df), desc="Processing entries", unit="entry"):
-            for category, label in zip(categories, labels):
-                prompt_temp = create_summary(row, group_name, label, occupation)
-                result = invoke_retry(prompt_temp, agent, parameters)
-                scores[category][index].append(result)
     # Assign score lists and calculate average scores
-    for category in categories:
         df[f'{category}_Scores'] = pd.Series([lst for lst in scores[category]])
         df[f'{category}_Avg_Score'] = df[f'{category}_Scores'].apply(
             lambda scores: sum(score for score in scores if score is not None) / len(scores) if scores else None
         )
     return df

     raise Exception("Failed to complete the API call after maximum retry attempts.")
+def process_scores_multiple(df, num_run,parameters,privilege_label,protect_label,agent,group_name,occupation):
+    """ Process entries and compute scores concurrently, with progress updates. """
+    scores = {key: [[] for _ in range(len(df))] for key in ['Privilege', 'Protect', 'Neutral']}
     for run in tqdm(range(num_run), desc="Processing runs", unit="run"):
         for index, row in tqdm(df.iterrows(), total=len(df), desc="Processing entries", unit="entry"):
+            for key, label in zip(['Privilege', 'Protect', 'Neutral'], [privilege_label, protect_label, None]):
+                prompt_temp = create_summary(row,group_name,label,occupation)
+                # print(f"Run {run + 1} - Entry {index + 1} - {key}:\n{prompt_temp}")
+                # print("=============================================================")
+                result = invoke_retry(prompt_temp,agent,parameters)
+                scores[key][index].append(result)
     # Assign score lists and calculate average scores
+    for category in ['Privilege', 'Protect', 'Neutral']:
         df[f'{category}_Scores'] = pd.Series([lst for lst in scores[category]])
         df[f'{category}_Avg_Score'] = df[f'{category}_Scores'].apply(
             lambda scores: sum(score for score in scores if score is not None) / len(scores) if scores else None
         )
     return df
+def process_scores_single(df, num_run,parameters,counterfactual_label,agent,group_name,occupation):
+    """ Process entries and compute scores concurrently, with progress updates. """
+    scores = {key: [[] for _ in range(len(df))] for key in ['Counterfactual', 'Neutral']}
+    for run in tqdm(range(num_run), desc="Processing runs", unit="run"):
+        for index, row in tqdm(df.iterrows(), total=len(df), desc="Processing entries", unit="entry"):
+            for key, label in zip(['Counterfactual', 'Neutral'], [counterfactual_label, None]):
+                prompt_temp = create_summary(row,group_name,label,occupation)
+                # print(f"Run {run + 1} - Entry {index + 1} - {key}:\n{prompt_temp}")
+                # print("=============================================================")
+                result = invoke_retry(prompt_temp,agent,parameters)
+                scores[key][index].append(result)
+    # Assign score lists and calculate average scores
+    for category in ['Counterfactual', 'Neutral']:
+        df[f'{category}_Scores'] = pd.Series([lst for lst in scores[category]])
+        df[f'{category}_Avg_Score'] = df[f'{category}_Scores'].apply(
+            lambda scores: sum(score for score in scores if score is not None) / len(scores) if scores else None
+        )
+    return df