File size: 9,346 Bytes
62b007e
 
 
 
 
5f944ac
62b007e
 
 
 
fa82923
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62b007e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5f944ac
 
62b007e
 
5f944ac
 
 
 
 
 
 
62b007e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fa82923
 
62b007e
 
fa82923
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62b007e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
import pandas as pd
import streamlit as st
import numpy as np
from pre import preprocess_uploaded_file
from difflib import SequenceMatcher
import time

def similar(a, b, threshold=0.9):
    return SequenceMatcher(None, a, b).ratio() > threshold

def find_different_scenarios(grouped_data, area):
    # Filter data for the specific functional area
    area_data = grouped_data[grouped_data['Functional area'] == area]
    
    # Get scenarios for each environment
    scenarios_by_env = {env: set(area_data[area_data['Environment'] == env]['Scenario name']) 
                        for env in area_data['Environment'].unique()}
    
    # Find scenarios that are in one environment but not the other
    diff_scenarios = []
    envs = list(scenarios_by_env.keys())
    for i in range(len(envs)):
        for j in range(i+1, len(envs)):
            env1, env2 = envs[i], envs[j]
            diff = scenarios_by_env[env1] ^ scenarios_by_env[env2]  # symmetric difference
            for scenario in diff:
                if scenario in scenarios_by_env[env1]:
                    diff_scenarios.append((scenario, env1, 'Present', env2, 'Missing'))
                else:
                    diff_scenarios.append((scenario, env2, 'Present', env1, 'Missing'))
    
    return diff_scenarios

def perform_multi_env_analysis(uploaded_dataframes):
    # Concatenate all dataframes into a single dataframe
    combined_data = pd.concat(uploaded_dataframes, ignore_index=True)

    # Get unique environments and functional areas
    unique_environments = combined_data['Environment'].unique()
    unique_areas = np.append(combined_data['Functional area'].unique(), "All")

    # Select environments to display
    selected_environments = st.multiselect("Select environments to display", unique_environments, default=unique_environments)

    # Initialize session state for selected functional areas if it doesn't exist
    if 'selected_functional_areas' not in st.session_state:
        st.session_state.selected_functional_areas = ["All"]

    # Select functional areas to display, using session state
    selected_functional_areas = st.multiselect(
        "Select functional areas",
        unique_areas,
        default=st.session_state.selected_functional_areas,
        key="functional_areas_multiselect"
    )

    # Add a button to confirm the selection
    if st.button("Confirm Functional Area Selection"):
        # Update session state with the new selection
        st.session_state.selected_functional_areas = selected_functional_areas
        st.success("Functional area selection updated!")
        time.sleep(0.5)  # Add a small delay for better user experience
        st.rerun()  # Rerun the app to reflect the changes

    if "All" in selected_functional_areas:
        selected_functional_areas = combined_data['Functional area'].unique()

    # Filter data based on selected environments and functional areas
    filtered_data = combined_data[
        (combined_data['Environment'].isin(selected_environments)) &
        (combined_data['Functional area'].isin(selected_functional_areas))
    ]

    # Group data by Environment, Functional area, Scenario name, and Status
    grouped_data = filtered_data.groupby(['Environment', 'Functional area', 'Scenario name', 'Status']).size().unstack(fill_value=0)

    # Ensure 'PASSED' and 'FAILED' columns exist
    if 'PASSED' not in grouped_data.columns:
        grouped_data['PASSED'] = 0
    if 'FAILED' not in grouped_data.columns:
        grouped_data['FAILED'] = 0

    # Calculate total scenarios
    grouped_data['Total'] = grouped_data['PASSED'] + grouped_data['FAILED']

    # Reset index to make Environment, Functional area, and Scenario name as columns
    grouped_data = grouped_data.reset_index()

    # Reorder columns
    grouped_data = grouped_data[['Environment', 'Functional area', 'Scenario name', 'Total', 'PASSED', 'FAILED']]

    # Display summary statistics
    st.write("### Summary Statistics")
    summary = grouped_data.groupby('Environment').agg({
        'Total': 'sum',
        'PASSED': 'sum',
        'FAILED': 'sum'
    }).reset_index()

    # Add column names as the first row
    summary_with_headers = pd.concat([pd.DataFrame([summary.columns], columns=summary.columns), summary], ignore_index=True)

    # Display the DataFrame
    st.dataframe(summary_with_headers)
    # Define scenarios_by_env here
    scenarios_by_env = {env: set(grouped_data[grouped_data['Environment'] == env]['Scenario name']) for env in selected_environments}

    missing_scenarios = []
    mismatched_scenarios = []
    
    st.write("### Inconsistent Scenario Count Analysis by Functional Area")
    
    if len(selected_environments) > 1:
        # Group data by Environment and Functional area, count scenarios
        scenario_counts = filtered_data.groupby(['Environment', 'Functional area'])['Scenario name'].nunique().unstack(fill_value=0)
        
        # Calculate the difference between max and min counts for each functional area
        count_diff = scenario_counts.max() - scenario_counts.min()
        
        # Sort functional areas by count difference, descending
        inconsistent_areas = count_diff.sort_values(ascending=False)
        
        st.write("Functional areas with inconsistent scenario counts across environments:")
        for area, diff in inconsistent_areas.items():
            if diff > 0:
                st.write(f"- {area}: Difference of {diff} scenarios")
                st.write(scenario_counts[area])
                st.write("\n")
        
        # Option to show detailed breakdown with a unique key
        if st.checkbox("Show detailed scenario count breakdown", key="show_detailed_breakdown"):
            st.write(scenario_counts)
    
        # Add a selectbox for choosing the functional area to analyze
        selected_area = st.selectbox("Select a functional area to analyze:", 
                                     options=[area for area, diff in inconsistent_areas.items() if diff > 0])
        
        if selected_area:
            st.write(f"### Detailed Analysis of Different Scenarios for '{selected_area}'")
            
            # Get scenarios for each environment
            scenarios_by_env = {env: set(filtered_data[(filtered_data['Environment'] == env) & 
                                                       (filtered_data['Functional area'] == selected_area)]['Scenario name']) 
                                for env in selected_environments}
            
            # Find scenarios that are different between environments
            all_scenarios = set.union(*scenarios_by_env.values())
            diff_scenarios = [scenario for scenario in all_scenarios 
                              if any(scenario not in env_scenarios for env_scenarios in scenarios_by_env.values())]
            
            # Create a DataFrame to show presence/absence of scenarios
            diff_df = pd.DataFrame(index=diff_scenarios, columns=selected_environments)
            for scenario in diff_scenarios:
                for env in selected_environments:
                    diff_df.at[scenario, env] = 'Present' if scenario in scenarios_by_env[env] else 'Missing'
            
            diff_df.reset_index(inplace=True)
            diff_df.rename(columns={'index': 'Scenario'}, inplace=True)
            
            # Sort the DataFrame to show scenarios with differences first
            diff_df['has_diff'] = diff_df.apply(lambda row: len(set(row[1:])) > 1, axis=1)
            diff_df = diff_df.sort_values('has_diff', ascending=False).drop('has_diff', axis=1)
            
            st.write(f"Number of scenarios that differ between environments: {len(diff_scenarios)}")
            
            # Display the DataFrame
            st.dataframe(diff_df)
            
            # Provide a download button for the DataFrame
            csv = diff_df.to_csv(index=False)
            st.download_button(
                label="Download CSV",
                data=csv,
                file_name=f"{selected_area}_scenario_comparison.csv",
                mime="text/csv",
            )
    else:
        st.write("Please select at least two environments for comparison.")

def multi_env_compare_main():
    st.title("Multi-Environment Comparison")
    
    # Get the number of environments from the user
    num_environments = st.number_input("Enter the number of environments", min_value=1, value=1, step=1)

    # Initialize list to store uploaded dataframes
    uploaded_dataframes = []

    # Loop through the number of environments and create file uploaders
    for i in range(num_environments):
        uploaded_files = st.file_uploader(f"Upload CSV files for Environment {i + 1}", type="csv", accept_multiple_files=True)
        
        for uploaded_file in uploaded_files:
            # Preprocess the uploaded CSV file
            data = preprocess_uploaded_file(uploaded_file)
            
            # Append the dataframe to the list
            uploaded_dataframes.append(data)
    
    # Check if any files were uploaded
    if uploaded_dataframes:
        # Perform analysis for uploaded data
        perform_multi_env_analysis(uploaded_dataframes)
    else:
        st.write("Please upload at least one CSV file.")

if __name__ == "__main__":
    multi_env_compare_main()