Zekun Wu commited on
Commit
bafdc7e
1 Parent(s): 64703c4
Files changed (2) hide show
  1. pages/1_Injection.py +92 -74
  2. pages/2_Evaluation.py +72 -52
pages/1_Injection.py CHANGED
@@ -8,6 +8,19 @@ from util.model import AzureAgent, GPTAgent
8
  st.title('Result Generation')
9
  st.sidebar.title('Model Settings')
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  # Define a function to manage state initialization
13
  def initialize_state():
@@ -21,77 +34,82 @@ def initialize_state():
21
  st.session_state[key] = default
22
 
23
 
24
- initialize_state()
25
-
26
- # Model selection and configuration
27
- model_type = st.sidebar.radio("Select the type of agent", ('GPTAgent', 'AzureAgent'))
28
- st.session_state.api_key = st.sidebar.text_input("API Key", type="password", value=st.session_state.api_key)
29
- st.session_state.endpoint_url = st.sidebar.text_input("Endpoint URL", value=st.session_state.endpoint_url)
30
- st.session_state.deployment_name = st.sidebar.text_input("Model Name", value=st.session_state.deployment_name)
31
- api_version = '2024-02-15-preview' if model_type == 'GPTAgent' else ''
32
- st.session_state.temperature = st.sidebar.slider("Temperature", 0.0, 1.0, st.session_state.temperature, 0.01)
33
- st.session_state.max_tokens = st.sidebar.number_input("Max Tokens", 1, 1000, st.session_state.max_tokens)
34
-
35
- if st.sidebar.button("Reset Model Info"):
36
- initialize_state() # Reset all state to defaults
37
- st.experimental_rerun()
38
-
39
- if st.sidebar.button("Submit Model Info"):
40
- st.session_state.model_submitted = True
41
-
42
- # Ensure experiment settings are only shown if model info is submitted
43
- if st.session_state.model_submitted:
44
- df = None
45
- file_options = st.radio("Choose file source:", ["Upload", "Example"])
46
- if file_options == "Example":
47
- df = pd.read_csv("prompt_test.csv")
48
- else:
49
- st.session_state.uploaded_file = st.file_uploader("Choose a file")
50
- if st.session_state.uploaded_file is not None:
51
- data = StringIO(st.session_state.uploaded_file.getvalue().decode("utf-8"))
52
- df = pd.read_csv(data)
53
- if df is not None:
54
-
55
- st.write('Data:', df)
56
-
57
- # Button to add a new row
58
-
59
- st.session_state.occupation = st.text_input("Occupation", value=st.session_state.occupation)
60
- st.session_state.group_name = st.text_input("Group Name", value=st.session_state.group_name)
61
- st.session_state.privilege_label = st.text_input("Privilege Label", value=st.session_state.privilege_label)
62
- st.session_state.protect_label = st.text_input("Protect Label", value=st.session_state.protect_label)
63
- st.session_state.num_run = st.number_input("Number of Runs", 1, 10, st.session_state.num_run)
64
-
65
- if st.button('Process Data') and not st.session_state.data_processed:
66
- # Initialize the correct agent based on model type
67
- if model_type == 'AzureAgent':
68
- agent = AzureAgent(st.session_state.api_key, st.session_state.endpoint_url,
69
- st.session_state.deployment_name)
70
- else:
71
- agent = GPTAgent(st.session_state.api_key, st.session_state.endpoint_url,
72
- st.session_state.deployment_name, api_version)
73
-
74
- # Process data and display results
75
- with st.spinner('Processing data...'):
76
- parameters = {"temperature": st.session_state.temperature, "max_tokens": st.session_state.max_tokens}
77
- df = process_scores_multiple(df, st.session_state.num_run, parameters, st.session_state.privilege_label,st.session_state.protect_label, agent, st.session_state.group_name,st.session_state.occupation)
78
- st.session_state.data_processed = True # Mark as processed
79
-
80
- st.write('Processed Data:', df)
81
-
82
- # Allow downloading of the evaluation results
83
- st.download_button(
84
- label="Download Generation Results",
85
- data=df.to_csv().encode('utf-8'),
86
- file_name='generation_results.csv',
87
- mime='text/csv',
88
- )
89
-
90
- if st.button("Reset Experiment Settings"):
91
- st.session_state.occupation = "Programmer"
92
- st.session_state.group_name = "Gender"
93
- st.session_state.privilege_label = "Male"
94
- st.session_state.protect_label = "Female"
95
- st.session_state.num_run = 1
96
- st.session_state.data_processed = False
97
- st.session_state.uploaded_file = None
 
 
 
 
 
 
8
  st.title('Result Generation')
9
  st.sidebar.title('Model Settings')
10
 
11
+ def check_password():
12
+ def password_entered():
13
+ if password_input == os.getenv('PASSWORD'):
14
+ st.session_state['password_correct'] = True
15
+ else:
16
+ st.error("Incorrect Password, please try again.")
17
+
18
+ password_input = st.text_input("Enter Password:", type="password")
19
+ submit_button = st.button("Submit", on_click=password_entered)
20
+
21
+ if submit_button and not st.session_state.get('password_correct', False):
22
+ st.error("Please enter a valid password to access the demo.")
23
+
24
 
25
  # Define a function to manage state initialization
26
  def initialize_state():
 
34
  st.session_state[key] = default
35
 
36
 
37
+ if not st.session_state.get('password_correct', False):
38
+ check_password()
39
+ else:
40
+ st.sidebar.success("Password Verified. Proceed with the demo.")
41
+
42
+ initialize_state()
43
+
44
+ # Model selection and configuration
45
+ model_type = st.sidebar.radio("Select the type of agent", ('GPTAgent', 'AzureAgent'))
46
+ st.session_state.api_key = st.sidebar.text_input("API Key", type="password", value=st.session_state.api_key)
47
+ st.session_state.endpoint_url = st.sidebar.text_input("Endpoint URL", value=st.session_state.endpoint_url)
48
+ st.session_state.deployment_name = st.sidebar.text_input("Model Name", value=st.session_state.deployment_name)
49
+ api_version = '2024-02-15-preview' if model_type == 'GPTAgent' else ''
50
+ st.session_state.temperature = st.sidebar.slider("Temperature", 0.0, 1.0, st.session_state.temperature, 0.01)
51
+ st.session_state.max_tokens = st.sidebar.number_input("Max Tokens", 1, 1000, st.session_state.max_tokens)
52
+
53
+ if st.sidebar.button("Reset Model Info"):
54
+ initialize_state() # Reset all state to defaults
55
+ st.experimental_rerun()
56
+
57
+ if st.sidebar.button("Submit Model Info"):
58
+ st.session_state.model_submitted = True
59
+
60
+ # Ensure experiment settings are only shown if model info is submitted
61
+ if st.session_state.model_submitted:
62
+ df = None
63
+ file_options = st.radio("Choose file source:", ["Upload", "Example"])
64
+ if file_options == "Example":
65
+ df = pd.read_csv("prompt_test.csv")
66
+ else:
67
+ st.session_state.uploaded_file = st.file_uploader("Choose a file")
68
+ if st.session_state.uploaded_file is not None:
69
+ data = StringIO(st.session_state.uploaded_file.getvalue().decode("utf-8"))
70
+ df = pd.read_csv(data)
71
+ if df is not None:
72
+
73
+ st.write('Data:', df)
74
+
75
+ # Button to add a new row
76
+
77
+ st.session_state.occupation = st.text_input("Occupation", value=st.session_state.occupation)
78
+ st.session_state.group_name = st.text_input("Group Name", value=st.session_state.group_name)
79
+ st.session_state.privilege_label = st.text_input("Privilege Label", value=st.session_state.privilege_label)
80
+ st.session_state.protect_label = st.text_input("Protect Label", value=st.session_state.protect_label)
81
+ st.session_state.num_run = st.number_input("Number of Runs", 1, 10, st.session_state.num_run)
82
+
83
+ if st.button('Process Data') and not st.session_state.data_processed:
84
+ # Initialize the correct agent based on model type
85
+ if model_type == 'AzureAgent':
86
+ agent = AzureAgent(st.session_state.api_key, st.session_state.endpoint_url,
87
+ st.session_state.deployment_name)
88
+ else:
89
+ agent = GPTAgent(st.session_state.api_key, st.session_state.endpoint_url,
90
+ st.session_state.deployment_name, api_version)
91
+
92
+ # Process data and display results
93
+ with st.spinner('Processing data...'):
94
+ parameters = {"temperature": st.session_state.temperature, "max_tokens": st.session_state.max_tokens}
95
+ df = process_scores_multiple(df, st.session_state.num_run, parameters, st.session_state.privilege_label,st.session_state.protect_label, agent, st.session_state.group_name,st.session_state.occupation)
96
+ st.session_state.data_processed = True # Mark as processed
97
+
98
+ st.write('Processed Data:', df)
99
+
100
+ # Allow downloading of the evaluation results
101
+ st.download_button(
102
+ label="Download Generation Results",
103
+ data=df.to_csv().encode('utf-8'),
104
+ file_name='generation_results.csv',
105
+ mime='text/csv',
106
+ )
107
+
108
+ if st.button("Reset Experiment Settings"):
109
+ st.session_state.occupation = "Programmer"
110
+ st.session_state.group_name = "Gender"
111
+ st.session_state.privilege_label = "Male"
112
+ st.session_state.protect_label = "Female"
113
+ st.session_state.num_run = 1
114
+ st.session_state.data_processed = False
115
+ st.session_state.uploaded_file = None
pages/2_Evaluation.py CHANGED
@@ -1,63 +1,83 @@
 
 
1
  import streamlit as st
2
  import pandas as pd
3
  from io import StringIO
4
  from util.evaluation import statistical_tests,calculate_correlations,calculate_divergences
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  def app():
7
  st.title('Result Evaluation')
8
 
9
- # Allow users to upload a CSV file with processed results
10
- uploaded_file = st.file_uploader("Upload your processed CSV file", type="csv")
11
- if uploaded_file is not None:
12
- data = StringIO(uploaded_file.getvalue().decode('utf-8'))
13
- df = pd.read_csv(data)
14
-
15
- # Add ranks for each score within each row
16
- ranks = df[['Privilege_Avg_Score', 'Protect_Avg_Score', 'Neutral_Avg_Score']].rank(axis=1, ascending=False)
17
-
18
- df['Privilege_Rank'] = ranks['Privilege_Avg_Score']
19
- df['Protect_Rank'] = ranks['Protect_Avg_Score']
20
- df['Neutral_Rank'] = ranks['Neutral_Avg_Score']
21
-
22
- st.write('Uploaded Data:', df)
23
-
24
- if st.button('Evaluate Data'):
25
- with st.spinner('Evaluating data...'):
26
- # Existing statistical tests
27
- test_results = statistical_tests(df)
28
- st.write('Test Results:', test_results)
29
- # evaluation_results = result_evaluation(test_results)
30
- # st.write('Evaluation Results:', evaluation_results)
31
-
32
- # New correlation calculations
33
- correlation_results = calculate_correlations(df)
34
- st.write('Correlation Results:', correlation_results)
35
-
36
- # New divergence calculations
37
- divergence_results = calculate_divergences(df)
38
- st.write('Divergence Results:', divergence_results)
39
-
40
- # Flatten the results for combining
41
- flat_test_results = {f"{key1}_{key2}": value2 for key1, value1 in test_results.items() for key2, value2
42
- in (value1.items() if isinstance(value1, dict) else {key1: value1}.items())}
43
- flat_correlation_results = {f"Correlation_{key1}": value1 for key1, value1 in
44
- correlation_results.items()}
45
- flat_divergence_results = {f"Divergence_{key1}": value1 for key1, value1 in divergence_results.items()}
46
-
47
- # Combine all results
48
- results_combined = {**flat_test_results, **flat_correlation_results, **flat_divergence_results}
49
-
50
- # Convert to DataFrame for download
51
- results_df = pd.DataFrame(list(results_combined.items()), columns=['Metric', 'Value'])
52
-
53
- st.write('Combined Results:', results_df)
54
-
55
- st.download_button(
56
- label="Download Evaluation Results",
57
- data=results_df.to_csv(index=False).encode('utf-8'),
58
- file_name='evaluation_results.csv',
59
- mime='text/csv',
60
- )
 
 
 
 
 
61
 
62
  if __name__ == "__main__":
63
  app()
 
1
+ import os
2
+
3
  import streamlit as st
4
  import pandas as pd
5
  from io import StringIO
6
  from util.evaluation import statistical_tests,calculate_correlations,calculate_divergences
7
 
8
+ def check_password():
9
+ def password_entered():
10
+ if password_input == os.getenv('PASSWORD'):
11
+ st.session_state['password_correct'] = True
12
+ else:
13
+ st.error("Incorrect Password, please try again.")
14
+
15
+ password_input = st.text_input("Enter Password:", type="password")
16
+ submit_button = st.button("Submit", on_click=password_entered)
17
+
18
+ if submit_button and not st.session_state.get('password_correct', False):
19
+ st.error("Please enter a valid password to access the demo.")
20
+
21
  def app():
22
  st.title('Result Evaluation')
23
 
24
+ if not st.session_state.get('password_correct', False):
25
+ check_password()
26
+ else:
27
+ st.sidebar.success("Password Verified. Proceed with the demo.")
28
+
29
+ # Allow users to upload a CSV file with processed results
30
+ uploaded_file = st.file_uploader("Upload your processed CSV file", type="csv")
31
+ if uploaded_file is not None:
32
+ data = StringIO(uploaded_file.getvalue().decode('utf-8'))
33
+ df = pd.read_csv(data)
34
+
35
+ # Add ranks for each score within each row
36
+ ranks = df[['Privilege_Avg_Score', 'Protect_Avg_Score', 'Neutral_Avg_Score']].rank(axis=1, ascending=False)
37
+
38
+ df['Privilege_Rank'] = ranks['Privilege_Avg_Score']
39
+ df['Protect_Rank'] = ranks['Protect_Avg_Score']
40
+ df['Neutral_Rank'] = ranks['Neutral_Avg_Score']
41
+
42
+ st.write('Uploaded Data:', df)
43
+
44
+ if st.button('Evaluate Data'):
45
+ with st.spinner('Evaluating data...'):
46
+ # Existing statistical tests
47
+ test_results = statistical_tests(df)
48
+ st.write('Test Results:', test_results)
49
+ # evaluation_results = result_evaluation(test_results)
50
+ # st.write('Evaluation Results:', evaluation_results)
51
+
52
+ # New correlation calculations
53
+ correlation_results = calculate_correlations(df)
54
+ st.write('Correlation Results:', correlation_results)
55
+
56
+ # New divergence calculations
57
+ divergence_results = calculate_divergences(df)
58
+ st.write('Divergence Results:', divergence_results)
59
+
60
+ # Flatten the results for combining
61
+ flat_test_results = {f"{key1}_{key2}": value2 for key1, value1 in test_results.items() for key2, value2
62
+ in (value1.items() if isinstance(value1, dict) else {key1: value1}.items())}
63
+ flat_correlation_results = {f"Correlation_{key1}": value1 for key1, value1 in
64
+ correlation_results.items()}
65
+ flat_divergence_results = {f"Divergence_{key1}": value1 for key1, value1 in divergence_results.items()}
66
+
67
+ # Combine all results
68
+ results_combined = {**flat_test_results, **flat_correlation_results, **flat_divergence_results}
69
+
70
+ # Convert to DataFrame for download
71
+ results_df = pd.DataFrame(list(results_combined.items()), columns=['Metric', 'Value'])
72
+
73
+ st.write('Combined Results:', results_df)
74
+
75
+ st.download_button(
76
+ label="Download Evaluation Results",
77
+ data=results_df.to_csv(index=False).encode('utf-8'),
78
+ file_name='evaluation_results.csv',
79
+ mime='text/csv',
80
+ )
81
 
82
  if __name__ == "__main__":
83
  app()