|
import pandas as pd |
|
import streamlit as st |
|
import numpy as np |
|
from pre import preprocess_uploaded_file |
|
from difflib import SequenceMatcher |
|
import time |
|
|
|
def similar(a, b, threshold=0.9): |
|
return SequenceMatcher(None, a, b).ratio() > threshold |
|
|
|
def find_different_scenarios(grouped_data, area): |
|
|
|
area_data = grouped_data[grouped_data['Functional area'] == area] |
|
|
|
|
|
scenarios_by_env = {env: set(area_data[area_data['Environment'] == env]['Scenario name']) |
|
for env in area_data['Environment'].unique()} |
|
|
|
|
|
diff_scenarios = [] |
|
envs = list(scenarios_by_env.keys()) |
|
for i in range(len(envs)): |
|
for j in range(i+1, len(envs)): |
|
env1, env2 = envs[i], envs[j] |
|
diff = scenarios_by_env[env1] ^ scenarios_by_env[env2] |
|
for scenario in diff: |
|
if scenario in scenarios_by_env[env1]: |
|
diff_scenarios.append((scenario, env1, 'Present', env2, 'Missing')) |
|
else: |
|
diff_scenarios.append((scenario, env2, 'Present', env1, 'Missing')) |
|
|
|
return diff_scenarios |
|
|
|
def perform_multi_env_analysis(uploaded_dataframes): |
|
|
|
combined_data = pd.concat(uploaded_dataframes, ignore_index=True) |
|
|
|
|
|
unique_environments = combined_data['Environment'].unique() |
|
unique_areas = np.append(combined_data['Functional area'].unique(), "All") |
|
|
|
|
|
selected_environments = st.multiselect("Select environments to display", unique_environments, default=unique_environments) |
|
|
|
|
|
if 'selected_functional_areas' not in st.session_state: |
|
st.session_state.selected_functional_areas = ["All"] |
|
|
|
|
|
selected_functional_areas = st.multiselect( |
|
"Select functional areas", |
|
unique_areas, |
|
default=st.session_state.selected_functional_areas, |
|
key="functional_areas_multiselect" |
|
) |
|
|
|
|
|
if st.button("Confirm Functional Area Selection"): |
|
|
|
st.session_state.selected_functional_areas = selected_functional_areas |
|
st.success("Functional area selection updated!") |
|
time.sleep(0.5) |
|
st.rerun() |
|
|
|
if "All" in selected_functional_areas: |
|
selected_functional_areas = combined_data['Functional area'].unique() |
|
|
|
|
|
filtered_data = combined_data[ |
|
(combined_data['Environment'].isin(selected_environments)) & |
|
(combined_data['Functional area'].isin(selected_functional_areas)) |
|
] |
|
|
|
|
|
grouped_data = filtered_data.groupby(['Environment', 'Functional area', 'Scenario name', 'Status']).size().unstack(fill_value=0) |
|
|
|
|
|
if 'PASSED' not in grouped_data.columns: |
|
grouped_data['PASSED'] = 0 |
|
if 'FAILED' not in grouped_data.columns: |
|
grouped_data['FAILED'] = 0 |
|
|
|
|
|
grouped_data['Total'] = grouped_data['PASSED'] + grouped_data['FAILED'] |
|
|
|
|
|
grouped_data = grouped_data.reset_index() |
|
|
|
|
|
grouped_data = grouped_data[['Environment', 'Functional area', 'Scenario name', 'Total', 'PASSED', 'FAILED']] |
|
|
|
|
|
st.write("### Summary Statistics") |
|
summary = grouped_data.groupby('Environment').agg({ |
|
'Total': 'sum', |
|
'PASSED': 'sum', |
|
'FAILED': 'sum' |
|
}).reset_index() |
|
|
|
|
|
summary_with_headers = pd.concat([pd.DataFrame([summary.columns], columns=summary.columns), summary], ignore_index=True) |
|
|
|
|
|
st.dataframe(summary_with_headers) |
|
|
|
scenarios_by_env = {env: set(grouped_data[grouped_data['Environment'] == env]['Scenario name']) for env in selected_environments} |
|
|
|
missing_scenarios = [] |
|
mismatched_scenarios = [] |
|
|
|
st.write("### Inconsistent Scenario Count Analysis by Functional Area") |
|
|
|
if len(selected_environments) > 1: |
|
|
|
scenario_counts = filtered_data.groupby(['Environment', 'Functional area'])['Scenario name'].nunique().unstack(fill_value=0) |
|
|
|
|
|
count_diff = scenario_counts.max() - scenario_counts.min() |
|
|
|
|
|
inconsistent_areas = count_diff.sort_values(ascending=False) |
|
|
|
st.write("Functional areas with inconsistent scenario counts across environments:") |
|
for area, diff in inconsistent_areas.items(): |
|
if diff > 0: |
|
st.write(f"- {area}: Difference of {diff} scenarios") |
|
st.write(scenario_counts[area]) |
|
st.write("\n") |
|
|
|
|
|
if st.checkbox("Show detailed scenario count breakdown", key="show_detailed_breakdown"): |
|
st.write(scenario_counts) |
|
|
|
|
|
selected_area = st.selectbox("Select a functional area to analyze:", |
|
options=[area for area, diff in inconsistent_areas.items() if diff > 0]) |
|
|
|
if selected_area: |
|
st.write(f"### Detailed Analysis of Different Scenarios for '{selected_area}'") |
|
|
|
|
|
scenarios_by_env = {env: set(filtered_data[(filtered_data['Environment'] == env) & |
|
(filtered_data['Functional area'] == selected_area)]['Scenario name']) |
|
for env in selected_environments} |
|
|
|
|
|
all_scenarios = set.union(*scenarios_by_env.values()) |
|
diff_scenarios = [scenario for scenario in all_scenarios |
|
if any(scenario not in env_scenarios for env_scenarios in scenarios_by_env.values())] |
|
|
|
|
|
diff_df = pd.DataFrame(index=diff_scenarios, columns=selected_environments) |
|
for scenario in diff_scenarios: |
|
for env in selected_environments: |
|
diff_df.at[scenario, env] = 'Present' if scenario in scenarios_by_env[env] else 'Missing' |
|
|
|
diff_df.reset_index(inplace=True) |
|
diff_df.rename(columns={'index': 'Scenario'}, inplace=True) |
|
|
|
|
|
diff_df['has_diff'] = diff_df.apply(lambda row: len(set(row[1:])) > 1, axis=1) |
|
diff_df = diff_df.sort_values('has_diff', ascending=False).drop('has_diff', axis=1) |
|
|
|
st.write(f"Number of scenarios that differ between environments: {len(diff_scenarios)}") |
|
|
|
|
|
st.dataframe(diff_df) |
|
|
|
|
|
csv = diff_df.to_csv(index=False) |
|
st.download_button( |
|
label="Download CSV", |
|
data=csv, |
|
file_name=f"{selected_area}_scenario_comparison.csv", |
|
mime="text/csv", |
|
) |
|
else: |
|
st.write("Please select at least two environments for comparison.") |
|
|
|
def multi_env_compare_main(): |
|
st.title("Multi-Environment Comparison") |
|
|
|
|
|
num_environments = st.number_input("Enter the number of environments", min_value=1, value=1, step=1) |
|
|
|
|
|
uploaded_dataframes = [] |
|
|
|
|
|
for i in range(num_environments): |
|
uploaded_files = st.file_uploader(f"Upload CSV files for Environment {i + 1}", type="csv", accept_multiple_files=True) |
|
|
|
for uploaded_file in uploaded_files: |
|
|
|
data = preprocess_uploaded_file(uploaded_file) |
|
|
|
|
|
uploaded_dataframes.append(data) |
|
|
|
|
|
if uploaded_dataframes: |
|
|
|
perform_multi_env_analysis(uploaded_dataframes) |
|
else: |
|
st.write("Please upload at least one CSV file.") |
|
|
|
if __name__ == "__main__": |
|
multi_env_compare_main() |