Spaces:
Sleeping
Sleeping
ProgU
commited on
Commit
•
d31a18a
1
Parent(s):
9da56e2
change the summary function to not include proportion as an argument
Browse files- pages/1_Injection.py +1 -1
- util/injection.py +13 -13
pages/1_Injection.py
CHANGED
@@ -117,7 +117,7 @@ else:
|
|
117 |
|
118 |
with st.spinner('Processing data...'):
|
119 |
parameters = {"temperature": st.session_state.temperature, "max_tokens": st.session_state.max_tokens}
|
120 |
-
preprocessed_df = process_scores_multiple(df, st.session_state.num_run, parameters, st.session_state.privilege_label,st.session_state.protect_label, agent, st.session_state.group_name,st.session_state.occupation,st.session_state.
|
121 |
st.session_state.data_processed = True # Mark as processed
|
122 |
|
123 |
st.write('Processed Data:', preprocessed_df)
|
|
|
117 |
|
118 |
with st.spinner('Processing data...'):
|
119 |
parameters = {"temperature": st.session_state.temperature, "max_tokens": st.session_state.max_tokens}
|
120 |
+
preprocessed_df = process_scores_multiple(df, st.session_state.num_run, parameters, st.session_state.privilege_label,st.session_state.protect_label, agent, st.session_state.group_name,st.session_state.occupation,st.session_state.prompt_template)
|
121 |
st.session_state.data_processed = True # Mark as processed
|
122 |
|
123 |
st.write('Processed Data:', preprocessed_df)
|
util/injection.py
CHANGED
@@ -5,13 +5,14 @@ import json_repair
|
|
5 |
import pandas as pd
|
6 |
from tqdm import tqdm
|
7 |
|
8 |
-
|
|
|
9 |
"""Generate a dynamic summary for scoring the applicant, excluding the group feature.
|
10 |
The occupation parameter allows customization of the job position.
|
11 |
"""
|
12 |
|
13 |
resume_info = row['Cleaned_Resume']
|
14 |
-
resume_info = resume_info[:int(len(resume_info) * proportion)]
|
15 |
|
16 |
info = f"{group_name}: {label};" if label else ''
|
17 |
|
@@ -24,7 +25,7 @@ def create_summary(group_name, label, occupation, row, proportion,template):
|
|
24 |
return summary
|
25 |
|
26 |
|
27 |
-
def invoke_retry(prompt, agent, parameters,string_input=False):
|
28 |
attempts = 0
|
29 |
delay = 2 # Initial delay in seconds
|
30 |
max_attempts = 5 # Maximum number of retry attempts
|
@@ -41,7 +42,8 @@ def invoke_retry(prompt, agent, parameters,string_input=False):
|
|
41 |
score_json = json.loads(score_text)
|
42 |
except json.JSONDecodeError:
|
43 |
try:
|
44 |
-
score_json = json.loads(
|
|
|
45 |
except json.JSONDecodeError:
|
46 |
raise Exception("Failed to decode JSON response even after repair attempt.")
|
47 |
# score = re.search(r'\d+', score_text)
|
@@ -56,7 +58,7 @@ def invoke_retry(prompt, agent, parameters,string_input=False):
|
|
56 |
attempts += 1
|
57 |
|
58 |
return -1
|
59 |
-
#raise Exception("Failed to complete the API call after maximum retry attempts.")
|
60 |
|
61 |
|
62 |
def calculate_avg_score(score_list):
|
@@ -66,18 +68,19 @@ def calculate_avg_score(score_list):
|
|
66 |
avg_score = sum(valid_scores) / len(valid_scores)
|
67 |
return avg_score
|
68 |
return None
|
69 |
-
def process_scores_multiple(df, num_run, parameters, privilege_label, protect_label, agent, group_name, occupation,proportion,template):
|
70 |
|
|
|
|
|
|
|
71 |
print(f"Processing {len(df)} entries with {num_run} runs each.")
|
72 |
""" Process entries and compute scores concurrently, with progress updates. """
|
73 |
-
scores = {key: [[] for _ in range(len(df))] for key in ['Privilege','Protect','Neutral']}
|
74 |
|
75 |
for run in tqdm(range(num_run), desc="Processing runs", unit="run"):
|
76 |
for index, (idx, row) in tqdm(enumerate(df.iterrows()), total=len(df), desc="Processing entries", unit="entry"):
|
77 |
|
78 |
for key, label in zip(['Privilege', 'Protect', 'Neutral'], [privilege_label, protect_label, False]):
|
79 |
-
|
80 |
-
prompt_normal = create_summary(group_name, label, occupation,row,proportion,template)
|
81 |
|
82 |
print(f"Run {run + 1} - Entry {index + 1} - {key}")
|
83 |
print("=============================================================")
|
@@ -86,17 +89,14 @@ def process_scores_multiple(df, num_run, parameters, privilege_label, protect_la
|
|
86 |
|
87 |
print(f"Scores: {scores}")
|
88 |
|
89 |
-
|
90 |
# Ensure all scores are lists and calculate average scores
|
91 |
-
for category in ['Privilege', 'Protect','Neutral']:
|
92 |
-
|
93 |
# Ensure the scores are lists and check before assignment
|
94 |
series_data = [lst if isinstance(lst, list) else [lst] for lst in scores[category]]
|
95 |
df[f'{category}_Scores'] = series_data
|
96 |
|
97 |
# Calculate the average score with additional debug info
|
98 |
|
99 |
-
|
100 |
df[f'{category}_Avg_Score'] = df[f'{category}_Scores'].apply(calculate_avg_score)
|
101 |
|
102 |
# Add ranks for each score within each row
|
|
|
5 |
import pandas as pd
|
6 |
from tqdm import tqdm
|
7 |
|
8 |
+
|
9 |
+
def create_summary(group_name, label, occupation, row, template):
|
10 |
"""Generate a dynamic summary for scoring the applicant, excluding the group feature.
|
11 |
The occupation parameter allows customization of the job position.
|
12 |
"""
|
13 |
|
14 |
resume_info = row['Cleaned_Resume']
|
15 |
+
# resume_info = resume_info[:int(len(resume_info) * proportion)]
|
16 |
|
17 |
info = f"{group_name}: {label};" if label else ''
|
18 |
|
|
|
25 |
return summary
|
26 |
|
27 |
|
28 |
+
def invoke_retry(prompt, agent, parameters, string_input=False):
|
29 |
attempts = 0
|
30 |
delay = 2 # Initial delay in seconds
|
31 |
max_attempts = 5 # Maximum number of retry attempts
|
|
|
42 |
score_json = json.loads(score_text)
|
43 |
except json.JSONDecodeError:
|
44 |
try:
|
45 |
+
score_json = json.loads(
|
46 |
+
json_repair.repair_json(score_text, skip_json_loads=True, return_objects=False))
|
47 |
except json.JSONDecodeError:
|
48 |
raise Exception("Failed to decode JSON response even after repair attempt.")
|
49 |
# score = re.search(r'\d+', score_text)
|
|
|
58 |
attempts += 1
|
59 |
|
60 |
return -1
|
61 |
+
# raise Exception("Failed to complete the API call after maximum retry attempts.")
|
62 |
|
63 |
|
64 |
def calculate_avg_score(score_list):
|
|
|
68 |
avg_score = sum(valid_scores) / len(valid_scores)
|
69 |
return avg_score
|
70 |
return None
|
|
|
71 |
|
72 |
+
|
73 |
+
def process_scores_multiple(df, num_run, parameters, privilege_label, protect_label, agent, group_name, occupation
|
74 |
+
, template):
|
75 |
print(f"Processing {len(df)} entries with {num_run} runs each.")
|
76 |
""" Process entries and compute scores concurrently, with progress updates. """
|
77 |
+
scores = {key: [[] for _ in range(len(df))] for key in ['Privilege', 'Protect', 'Neutral']}
|
78 |
|
79 |
for run in tqdm(range(num_run), desc="Processing runs", unit="run"):
|
80 |
for index, (idx, row) in tqdm(enumerate(df.iterrows()), total=len(df), desc="Processing entries", unit="entry"):
|
81 |
|
82 |
for key, label in zip(['Privilege', 'Protect', 'Neutral'], [privilege_label, protect_label, False]):
|
83 |
+
prompt_normal = create_summary(group_name, label, occupation, row, template)
|
|
|
84 |
|
85 |
print(f"Run {run + 1} - Entry {index + 1} - {key}")
|
86 |
print("=============================================================")
|
|
|
89 |
|
90 |
print(f"Scores: {scores}")
|
91 |
|
|
|
92 |
# Ensure all scores are lists and calculate average scores
|
93 |
+
for category in ['Privilege', 'Protect', 'Neutral']:
|
|
|
94 |
# Ensure the scores are lists and check before assignment
|
95 |
series_data = [lst if isinstance(lst, list) else [lst] for lst in scores[category]]
|
96 |
df[f'{category}_Scores'] = series_data
|
97 |
|
98 |
# Calculate the average score with additional debug info
|
99 |
|
|
|
100 |
df[f'{category}_Avg_Score'] = df[f'{category}_Scores'].apply(calculate_avg_score)
|
101 |
|
102 |
# Add ranks for each score within each row
|