ProgU commited on
Commit
d31a18a
1 Parent(s): 9da56e2

change the summary function to not include proportion as an argument

Browse files
Files changed (2) hide show
  1. pages/1_Injection.py +1 -1
  2. util/injection.py +13 -13
pages/1_Injection.py CHANGED
@@ -117,7 +117,7 @@ else:
117
 
118
  with st.spinner('Processing data...'):
119
  parameters = {"temperature": st.session_state.temperature, "max_tokens": st.session_state.max_tokens}
120
- preprocessed_df = process_scores_multiple(df, st.session_state.num_run, parameters, st.session_state.privilege_label,st.session_state.protect_label, agent, st.session_state.group_name,st.session_state.occupation,st.session_state.proportion,st.session_state.prompt_template)
121
  st.session_state.data_processed = True # Mark as processed
122
 
123
  st.write('Processed Data:', preprocessed_df)
 
117
 
118
  with st.spinner('Processing data...'):
119
  parameters = {"temperature": st.session_state.temperature, "max_tokens": st.session_state.max_tokens}
120
+ preprocessed_df = process_scores_multiple(df, st.session_state.num_run, parameters, st.session_state.privilege_label,st.session_state.protect_label, agent, st.session_state.group_name,st.session_state.occupation,st.session_state.prompt_template)
121
  st.session_state.data_processed = True # Mark as processed
122
 
123
  st.write('Processed Data:', preprocessed_df)
util/injection.py CHANGED
@@ -5,13 +5,14 @@ import json_repair
5
  import pandas as pd
6
  from tqdm import tqdm
7
 
8
- def create_summary(group_name, label, occupation, row, proportion,template):
 
9
  """Generate a dynamic summary for scoring the applicant, excluding the group feature.
10
  The occupation parameter allows customization of the job position.
11
  """
12
 
13
  resume_info = row['Cleaned_Resume']
14
- resume_info = resume_info[:int(len(resume_info) * proportion)]
15
 
16
  info = f"{group_name}: {label};" if label else ''
17
 
@@ -24,7 +25,7 @@ def create_summary(group_name, label, occupation, row, proportion,template):
24
  return summary
25
 
26
 
27
- def invoke_retry(prompt, agent, parameters,string_input=False):
28
  attempts = 0
29
  delay = 2 # Initial delay in seconds
30
  max_attempts = 5 # Maximum number of retry attempts
@@ -41,7 +42,8 @@ def invoke_retry(prompt, agent, parameters,string_input=False):
41
  score_json = json.loads(score_text)
42
  except json.JSONDecodeError:
43
  try:
44
- score_json = json.loads(json_repair.repair_json(score_text, skip_json_loads=True, return_objects=False))
 
45
  except json.JSONDecodeError:
46
  raise Exception("Failed to decode JSON response even after repair attempt.")
47
  # score = re.search(r'\d+', score_text)
@@ -56,7 +58,7 @@ def invoke_retry(prompt, agent, parameters,string_input=False):
56
  attempts += 1
57
 
58
  return -1
59
- #raise Exception("Failed to complete the API call after maximum retry attempts.")
60
 
61
 
62
  def calculate_avg_score(score_list):
@@ -66,18 +68,19 @@ def calculate_avg_score(score_list):
66
  avg_score = sum(valid_scores) / len(valid_scores)
67
  return avg_score
68
  return None
69
- def process_scores_multiple(df, num_run, parameters, privilege_label, protect_label, agent, group_name, occupation,proportion,template):
70
 
 
 
 
71
  print(f"Processing {len(df)} entries with {num_run} runs each.")
72
  """ Process entries and compute scores concurrently, with progress updates. """
73
- scores = {key: [[] for _ in range(len(df))] for key in ['Privilege','Protect','Neutral']}
74
 
75
  for run in tqdm(range(num_run), desc="Processing runs", unit="run"):
76
  for index, (idx, row) in tqdm(enumerate(df.iterrows()), total=len(df), desc="Processing entries", unit="entry"):
77
 
78
  for key, label in zip(['Privilege', 'Protect', 'Neutral'], [privilege_label, protect_label, False]):
79
-
80
- prompt_normal = create_summary(group_name, label, occupation,row,proportion,template)
81
 
82
  print(f"Run {run + 1} - Entry {index + 1} - {key}")
83
  print("=============================================================")
@@ -86,17 +89,14 @@ def process_scores_multiple(df, num_run, parameters, privilege_label, protect_la
86
 
87
  print(f"Scores: {scores}")
88
 
89
-
90
  # Ensure all scores are lists and calculate average scores
91
- for category in ['Privilege', 'Protect','Neutral']:
92
-
93
  # Ensure the scores are lists and check before assignment
94
  series_data = [lst if isinstance(lst, list) else [lst] for lst in scores[category]]
95
  df[f'{category}_Scores'] = series_data
96
 
97
  # Calculate the average score with additional debug info
98
 
99
-
100
  df[f'{category}_Avg_Score'] = df[f'{category}_Scores'].apply(calculate_avg_score)
101
 
102
  # Add ranks for each score within each row
 
5
  import pandas as pd
6
  from tqdm import tqdm
7
 
8
+
9
+ def create_summary(group_name, label, occupation, row, template):
10
  """Generate a dynamic summary for scoring the applicant, excluding the group feature.
11
  The occupation parameter allows customization of the job position.
12
  """
13
 
14
  resume_info = row['Cleaned_Resume']
15
+ # resume_info = resume_info[:int(len(resume_info) * proportion)]
16
 
17
  info = f"{group_name}: {label};" if label else ''
18
 
 
25
  return summary
26
 
27
 
28
+ def invoke_retry(prompt, agent, parameters, string_input=False):
29
  attempts = 0
30
  delay = 2 # Initial delay in seconds
31
  max_attempts = 5 # Maximum number of retry attempts
 
42
  score_json = json.loads(score_text)
43
  except json.JSONDecodeError:
44
  try:
45
+ score_json = json.loads(
46
+ json_repair.repair_json(score_text, skip_json_loads=True, return_objects=False))
47
  except json.JSONDecodeError:
48
  raise Exception("Failed to decode JSON response even after repair attempt.")
49
  # score = re.search(r'\d+', score_text)
 
58
  attempts += 1
59
 
60
  return -1
61
+ # raise Exception("Failed to complete the API call after maximum retry attempts.")
62
 
63
 
64
  def calculate_avg_score(score_list):
 
68
  avg_score = sum(valid_scores) / len(valid_scores)
69
  return avg_score
70
  return None
 
71
 
72
+
73
+ def process_scores_multiple(df, num_run, parameters, privilege_label, protect_label, agent, group_name, occupation
74
+ , template):
75
  print(f"Processing {len(df)} entries with {num_run} runs each.")
76
  """ Process entries and compute scores concurrently, with progress updates. """
77
+ scores = {key: [[] for _ in range(len(df))] for key in ['Privilege', 'Protect', 'Neutral']}
78
 
79
  for run in tqdm(range(num_run), desc="Processing runs", unit="run"):
80
  for index, (idx, row) in tqdm(enumerate(df.iterrows()), total=len(df), desc="Processing entries", unit="entry"):
81
 
82
  for key, label in zip(['Privilege', 'Protect', 'Neutral'], [privilege_label, protect_label, False]):
83
+ prompt_normal = create_summary(group_name, label, occupation, row, template)
 
84
 
85
  print(f"Run {run + 1} - Entry {index + 1} - {key}")
86
  print("=============================================================")
 
89
 
90
  print(f"Scores: {scores}")
91
 
 
92
  # Ensure all scores are lists and calculate average scores
93
+ for category in ['Privilege', 'Protect', 'Neutral']:
 
94
  # Ensure the scores are lists and check before assignment
95
  series_data = [lst if isinstance(lst, list) else [lst] for lst in scores[category]]
96
  df[f'{category}_Scores'] = series_data
97
 
98
  # Calculate the average score with additional debug info
99
 
 
100
  df[f'{category}_Avg_Score'] = df[f'{category}_Scores'].apply(calculate_avg_score)
101
 
102
  # Add ranks for each score within each row