Spaces:

holistic-ai
/

job-fair

Sleeping

App Files Files Community

ProgU commited on May 30, 2024

Commit

d31a18a

1 Parent(s): 9da56e2

change the summary function to not include proportion as an argument

Browse files

Files changed (2) hide show

pages/1_Injection.py +1 -1
util/injection.py +13 -13

pages/1_Injection.py CHANGED Viewed

@@ -117,7 +117,7 @@ else:
                 with st.spinner('Processing data...'):
                     parameters = {"temperature": st.session_state.temperature, "max_tokens": st.session_state.max_tokens}
-                    preprocessed_df = process_scores_multiple(df, st.session_state.num_run, parameters, st.session_state.privilege_label,st.session_state.protect_label, agent, st.session_state.group_name,st.session_state.occupation,st.session_state.proportion,st.session_state.prompt_template)
                     st.session_state.data_processed = True  # Mark as processed
                 st.write('Processed Data:', preprocessed_df)

                 with st.spinner('Processing data...'):
                     parameters = {"temperature": st.session_state.temperature, "max_tokens": st.session_state.max_tokens}
+                    preprocessed_df = process_scores_multiple(df, st.session_state.num_run, parameters, st.session_state.privilege_label,st.session_state.protect_label, agent, st.session_state.group_name,st.session_state.occupation,st.session_state.prompt_template)
                     st.session_state.data_processed = True  # Mark as processed
                 st.write('Processed Data:', preprocessed_df)

util/injection.py CHANGED Viewed

@@ -5,13 +5,14 @@ import json_repair
 import pandas as pd
 from tqdm import tqdm
-def create_summary(group_name, label, occupation, row, proportion,template):
     """Generate a dynamic summary for scoring the applicant, excluding the group feature.
        The occupation parameter allows customization of the job position.
     """
     resume_info = row['Cleaned_Resume']
-    resume_info = resume_info[:int(len(resume_info) * proportion)]
     info = f"{group_name}: {label};" if label else ''
@@ -24,7 +25,7 @@ def create_summary(group_name, label, occupation, row, proportion,template):
     return summary
-def invoke_retry(prompt, agent, parameters,string_input=False):
     attempts = 0
     delay = 2  # Initial delay in seconds
     max_attempts = 5  # Maximum number of retry attempts
@@ -41,7 +42,8 @@ def invoke_retry(prompt, agent, parameters,string_input=False):
                 score_json = json.loads(score_text)
             except json.JSONDecodeError:
                 try:
-                    score_json = json.loads(json_repair.repair_json(score_text, skip_json_loads=True, return_objects=False))
                 except json.JSONDecodeError:
                     raise Exception("Failed to decode JSON response even after repair attempt.")
             # score = re.search(r'\d+', score_text)
@@ -56,7 +58,7 @@ def invoke_retry(prompt, agent, parameters,string_input=False):
             attempts += 1
     return -1
-    #raise Exception("Failed to complete the API call after maximum retry attempts.")
 def calculate_avg_score(score_list):
@@ -66,18 +68,19 @@ def calculate_avg_score(score_list):
             avg_score = sum(valid_scores) / len(valid_scores)
             return avg_score
     return None
-def process_scores_multiple(df, num_run, parameters, privilege_label, protect_label, agent, group_name, occupation,proportion,template):
     print(f"Processing {len(df)} entries with {num_run} runs each.")
     """ Process entries and compute scores concurrently, with progress updates. """
-    scores = {key: [[] for _ in range(len(df))] for key in ['Privilege','Protect','Neutral']}
     for run in tqdm(range(num_run), desc="Processing runs", unit="run"):
         for index, (idx, row) in tqdm(enumerate(df.iterrows()), total=len(df), desc="Processing entries", unit="entry"):
             for key, label in zip(['Privilege', 'Protect', 'Neutral'], [privilege_label, protect_label, False]):
-                prompt_normal = create_summary(group_name, label, occupation,row,proportion,template)
                 print(f"Run {run + 1} - Entry {index + 1} - {key}")
                 print("=============================================================")
@@ -86,17 +89,14 @@ def process_scores_multiple(df, num_run, parameters, privilege_label, protect_la
     print(f"Scores: {scores}")
     # Ensure all scores are lists and calculate average scores
-    for category in ['Privilege', 'Protect','Neutral']:
         # Ensure the scores are lists and check before assignment
         series_data = [lst if isinstance(lst, list) else [lst] for lst in scores[category]]
         df[f'{category}_Scores'] = series_data
         # Calculate the average score with additional debug info
         df[f'{category}_Avg_Score'] = df[f'{category}_Scores'].apply(calculate_avg_score)
     # Add ranks for each score within each row

 import pandas as pd
 from tqdm import tqdm
+def create_summary(group_name, label, occupation, row, template):
     """Generate a dynamic summary for scoring the applicant, excluding the group feature.
        The occupation parameter allows customization of the job position.
     """
     resume_info = row['Cleaned_Resume']
+    # resume_info = resume_info[:int(len(resume_info) * proportion)]
     info = f"{group_name}: {label};" if label else ''
     return summary
+def invoke_retry(prompt, agent, parameters, string_input=False):
     attempts = 0
     delay = 2  # Initial delay in seconds
     max_attempts = 5  # Maximum number of retry attempts
                 score_json = json.loads(score_text)
             except json.JSONDecodeError:
                 try:
+                    score_json = json.loads(
+                        json_repair.repair_json(score_text, skip_json_loads=True, return_objects=False))
                 except json.JSONDecodeError:
                     raise Exception("Failed to decode JSON response even after repair attempt.")
             # score = re.search(r'\d+', score_text)
             attempts += 1
     return -1
+    # raise Exception("Failed to complete the API call after maximum retry attempts.")
 def calculate_avg_score(score_list):
             avg_score = sum(valid_scores) / len(valid_scores)
             return avg_score
     return None
+def process_scores_multiple(df, num_run, parameters, privilege_label, protect_label, agent, group_name, occupation
+                            , template):
     print(f"Processing {len(df)} entries with {num_run} runs each.")
     """ Process entries and compute scores concurrently, with progress updates. """
+    scores = {key: [[] for _ in range(len(df))] for key in ['Privilege', 'Protect', 'Neutral']}
     for run in tqdm(range(num_run), desc="Processing runs", unit="run"):
         for index, (idx, row) in tqdm(enumerate(df.iterrows()), total=len(df), desc="Processing entries", unit="entry"):
             for key, label in zip(['Privilege', 'Protect', 'Neutral'], [privilege_label, protect_label, False]):
+                prompt_normal = create_summary(group_name, label, occupation, row, template)
                 print(f"Run {run + 1} - Entry {index + 1} - {key}")
                 print("=============================================================")
     print(f"Scores: {scores}")
     # Ensure all scores are lists and calculate average scores
+    for category in ['Privilege', 'Protect', 'Neutral']:
         # Ensure the scores are lists and check before assignment
         series_data = [lst if isinstance(lst, list) else [lst] for lst in scores[category]]
         df[f'{category}_Scores'] = series_data
         # Calculate the average score with additional debug info
         df[f'{category}_Avg_Score'] = df[f'{category}_Scores'].apply(calculate_avg_score)
     # Add ranks for each score within each row