Spaces:
Sleeping
Sleeping
File size: 9,346 Bytes
62b007e 5f944ac 62b007e fa82923 62b007e 5f944ac 62b007e 5f944ac 62b007e fa82923 62b007e fa82923 62b007e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 |
import pandas as pd
import streamlit as st
import numpy as np
from pre import preprocess_uploaded_file
from difflib import SequenceMatcher
import time
def similar(a, b, threshold=0.9):
return SequenceMatcher(None, a, b).ratio() > threshold
def find_different_scenarios(grouped_data, area):
# Filter data for the specific functional area
area_data = grouped_data[grouped_data['Functional area'] == area]
# Get scenarios for each environment
scenarios_by_env = {env: set(area_data[area_data['Environment'] == env]['Scenario name'])
for env in area_data['Environment'].unique()}
# Find scenarios that are in one environment but not the other
diff_scenarios = []
envs = list(scenarios_by_env.keys())
for i in range(len(envs)):
for j in range(i+1, len(envs)):
env1, env2 = envs[i], envs[j]
diff = scenarios_by_env[env1] ^ scenarios_by_env[env2] # symmetric difference
for scenario in diff:
if scenario in scenarios_by_env[env1]:
diff_scenarios.append((scenario, env1, 'Present', env2, 'Missing'))
else:
diff_scenarios.append((scenario, env2, 'Present', env1, 'Missing'))
return diff_scenarios
def perform_multi_env_analysis(uploaded_dataframes):
# Concatenate all dataframes into a single dataframe
combined_data = pd.concat(uploaded_dataframes, ignore_index=True)
# Get unique environments and functional areas
unique_environments = combined_data['Environment'].unique()
unique_areas = np.append(combined_data['Functional area'].unique(), "All")
# Select environments to display
selected_environments = st.multiselect("Select environments to display", unique_environments, default=unique_environments)
# Initialize session state for selected functional areas if it doesn't exist
if 'selected_functional_areas' not in st.session_state:
st.session_state.selected_functional_areas = ["All"]
# Select functional areas to display, using session state
selected_functional_areas = st.multiselect(
"Select functional areas",
unique_areas,
default=st.session_state.selected_functional_areas,
key="functional_areas_multiselect"
)
# Add a button to confirm the selection
if st.button("Confirm Functional Area Selection"):
# Update session state with the new selection
st.session_state.selected_functional_areas = selected_functional_areas
st.success("Functional area selection updated!")
time.sleep(0.5) # Add a small delay for better user experience
st.rerun() # Rerun the app to reflect the changes
if "All" in selected_functional_areas:
selected_functional_areas = combined_data['Functional area'].unique()
# Filter data based on selected environments and functional areas
filtered_data = combined_data[
(combined_data['Environment'].isin(selected_environments)) &
(combined_data['Functional area'].isin(selected_functional_areas))
]
# Group data by Environment, Functional area, Scenario name, and Status
grouped_data = filtered_data.groupby(['Environment', 'Functional area', 'Scenario name', 'Status']).size().unstack(fill_value=0)
# Ensure 'PASSED' and 'FAILED' columns exist
if 'PASSED' not in grouped_data.columns:
grouped_data['PASSED'] = 0
if 'FAILED' not in grouped_data.columns:
grouped_data['FAILED'] = 0
# Calculate total scenarios
grouped_data['Total'] = grouped_data['PASSED'] + grouped_data['FAILED']
# Reset index to make Environment, Functional area, and Scenario name as columns
grouped_data = grouped_data.reset_index()
# Reorder columns
grouped_data = grouped_data[['Environment', 'Functional area', 'Scenario name', 'Total', 'PASSED', 'FAILED']]
# Display summary statistics
st.write("### Summary Statistics")
summary = grouped_data.groupby('Environment').agg({
'Total': 'sum',
'PASSED': 'sum',
'FAILED': 'sum'
}).reset_index()
# Add column names as the first row
summary_with_headers = pd.concat([pd.DataFrame([summary.columns], columns=summary.columns), summary], ignore_index=True)
# Display the DataFrame
st.dataframe(summary_with_headers)
# Define scenarios_by_env here
scenarios_by_env = {env: set(grouped_data[grouped_data['Environment'] == env]['Scenario name']) for env in selected_environments}
missing_scenarios = []
mismatched_scenarios = []
st.write("### Inconsistent Scenario Count Analysis by Functional Area")
if len(selected_environments) > 1:
# Group data by Environment and Functional area, count scenarios
scenario_counts = filtered_data.groupby(['Environment', 'Functional area'])['Scenario name'].nunique().unstack(fill_value=0)
# Calculate the difference between max and min counts for each functional area
count_diff = scenario_counts.max() - scenario_counts.min()
# Sort functional areas by count difference, descending
inconsistent_areas = count_diff.sort_values(ascending=False)
st.write("Functional areas with inconsistent scenario counts across environments:")
for area, diff in inconsistent_areas.items():
if diff > 0:
st.write(f"- {area}: Difference of {diff} scenarios")
st.write(scenario_counts[area])
st.write("\n")
# Option to show detailed breakdown with a unique key
if st.checkbox("Show detailed scenario count breakdown", key="show_detailed_breakdown"):
st.write(scenario_counts)
# Add a selectbox for choosing the functional area to analyze
selected_area = st.selectbox("Select a functional area to analyze:",
options=[area for area, diff in inconsistent_areas.items() if diff > 0])
if selected_area:
st.write(f"### Detailed Analysis of Different Scenarios for '{selected_area}'")
# Get scenarios for each environment
scenarios_by_env = {env: set(filtered_data[(filtered_data['Environment'] == env) &
(filtered_data['Functional area'] == selected_area)]['Scenario name'])
for env in selected_environments}
# Find scenarios that are different between environments
all_scenarios = set.union(*scenarios_by_env.values())
diff_scenarios = [scenario for scenario in all_scenarios
if any(scenario not in env_scenarios for env_scenarios in scenarios_by_env.values())]
# Create a DataFrame to show presence/absence of scenarios
diff_df = pd.DataFrame(index=diff_scenarios, columns=selected_environments)
for scenario in diff_scenarios:
for env in selected_environments:
diff_df.at[scenario, env] = 'Present' if scenario in scenarios_by_env[env] else 'Missing'
diff_df.reset_index(inplace=True)
diff_df.rename(columns={'index': 'Scenario'}, inplace=True)
# Sort the DataFrame to show scenarios with differences first
diff_df['has_diff'] = diff_df.apply(lambda row: len(set(row[1:])) > 1, axis=1)
diff_df = diff_df.sort_values('has_diff', ascending=False).drop('has_diff', axis=1)
st.write(f"Number of scenarios that differ between environments: {len(diff_scenarios)}")
# Display the DataFrame
st.dataframe(diff_df)
# Provide a download button for the DataFrame
csv = diff_df.to_csv(index=False)
st.download_button(
label="Download CSV",
data=csv,
file_name=f"{selected_area}_scenario_comparison.csv",
mime="text/csv",
)
else:
st.write("Please select at least two environments for comparison.")
def multi_env_compare_main():
st.title("Multi-Environment Comparison")
# Get the number of environments from the user
num_environments = st.number_input("Enter the number of environments", min_value=1, value=1, step=1)
# Initialize list to store uploaded dataframes
uploaded_dataframes = []
# Loop through the number of environments and create file uploaders
for i in range(num_environments):
uploaded_files = st.file_uploader(f"Upload CSV files for Environment {i + 1}", type="csv", accept_multiple_files=True)
for uploaded_file in uploaded_files:
# Preprocess the uploaded CSV file
data = preprocess_uploaded_file(uploaded_file)
# Append the dataframe to the list
uploaded_dataframes.append(data)
# Check if any files were uploaded
if uploaded_dataframes:
# Perform analysis for uploaded data
perform_multi_env_analysis(uploaded_dataframes)
else:
st.write("Please upload at least one CSV file.")
if __name__ == "__main__":
multi_env_compare_main() |