Spaces:

BananaSauce
/

batch-run-csv-analyser

Sleeping

App Files Files Community

BananaSauce commited on Mar 7

Commit

d2ed71e

1 Parent(s): c6b142e

complete xlsx support

Browse files

Files changed (4) hide show

app.py +40 -20
multiple.py +58 -39
pre.py +100 -32
second.py +21 -3

app.py CHANGED Viewed

@@ -16,6 +16,11 @@ def single_main(uploaded_file):
        # Process the file with header
         data = preprocess_uploaded_file(uploaded_file)
        # Display scenarios with status "failed" grouped by functional area
         failed_scenarios = data[data['Status'] == 'FAILED']
         passed_scenarios = data[data['Status'] == 'PASSED']
@@ -44,6 +49,9 @@ def single_main(uploaded_file):
             # st.write(f"Scenarios with status '{selected_status}' grouped by functional area:")
             st.markdown(f"### Scenarios with status '{selected_status}' grouped by functional area:")
             # Select a range of functional areas to filter scenarios
             selected_functional_areas = st.multiselect("Select functional areas", unique_areas, ["All"])
@@ -55,7 +63,10 @@ def single_main(uploaded_file):
             if not selected_functional_areas:  # Check if the list is empty
                 st.error("Please select at least one functional area.")
             else:
-             # Calculate the average time spent for each functional area
                 average_time_spent_seconds = filtered_scenarios.groupby('Functional area')['Time spent'].mean().reset_index()
                 # Convert average time spent from seconds to minutes and seconds format
@@ -69,7 +80,11 @@ def single_main(uploaded_file):
                 # Filter scenarios based on selected functional area
                 if selected_status == 'Failed':
-                    grouped_filtered_scenarios = filtered_scenarios.groupby('Functional area')[['Scenario Name', 'Error Message','Time spent(m:s)']].apply(lambda x: x.reset_index(drop=True))
                 elif selected_status == 'Passed':
                     grouped_filtered_scenarios = filtered_scenarios.groupby('Functional area')[['Scenario Name', 'Time spent(m:s)']].apply(lambda x: x.reset_index(drop=True))
                 else:
@@ -96,24 +111,29 @@ def single_main(uploaded_file):
                     # Create and display bar graph of errors by functional area
                     st.write(f"### Bar graph showing number of '{selected_status}' scenarios in each functional area:")
                     error_counts = grouped_filtered_scenarios['Functional area'].value_counts()
-                    plt.figure(figsize=(10, 6))
-                    plt.bar(error_counts.index, error_counts.values)
-                    plt.xlabel('Functional Area')
-                    plt.ylabel('Number of Failures')
-                    plt.title(f"Number of '{selected_status}' scenarios by Functional Area")
-                    plt.xticks(rotation=45, ha='right')
-                    # Set y-axis limits and ticks for consistent interval of 1
-                    y_max = max(error_counts.values) + 1
-                    plt.ylim(0, y_max)
-                    plt.yticks(range(0, y_max, 1))
-                    # Display individual numbers on y-axis
-                    for i, count in enumerate(error_counts.values):
-                        plt.text(i, count, str(count), ha='center', va='bottom')
-                    plt.tight_layout()  # Add this line to adjust layout
-                    st.pyplot(plt)
         else:
             st.write("### No scenarios with status 'failed' found.")
     pass

        # Process the file with header
         data = preprocess_uploaded_file(uploaded_file)
+       # Display debugging information
+        st.write("Data shape:", data.shape)
+        st.write("Unique functional areas:", data['Functional area'].nunique())
+        st.write("Sample of data:", data.head())
        # Display scenarios with status "failed" grouped by functional area
         failed_scenarios = data[data['Status'] == 'FAILED']
         passed_scenarios = data[data['Status'] == 'PASSED']
             # st.write(f"Scenarios with status '{selected_status}' grouped by functional area:")
             st.markdown(f"### Scenarios with status '{selected_status}' grouped by functional area:")
+            # Display count of unique functional areas
+            # st.write(f"Number of unique functional areas: {len(unique_areas) - 1}")  # Subtract 1 for "All"
             # Select a range of functional areas to filter scenarios
             selected_functional_areas = st.multiselect("Select functional areas", unique_areas, ["All"])
             if not selected_functional_areas:  # Check if the list is empty
                 st.error("Please select at least one functional area.")
             else:
+                # Display count of filtered scenarios
+                st.write(f"Number of filtered scenarios: {len(filtered_scenarios)}")
+                # Calculate the average time spent for each functional area
                 average_time_spent_seconds = filtered_scenarios.groupby('Functional area')['Time spent'].mean().reset_index()
                 # Convert average time spent from seconds to minutes and seconds format
                 # Filter scenarios based on selected functional area
                 if selected_status == 'Failed':
+                    # Check if Failed Step column exists
+                    if 'Failed Step' in filtered_scenarios.columns:
+                        grouped_filtered_scenarios = filtered_scenarios.groupby('Functional area')[['Scenario Name', 'Error Message', 'Failed Step', 'Time spent(m:s)']].apply(lambda x: x.reset_index(drop=True))
+                    else:
+                        grouped_filtered_scenarios = filtered_scenarios.groupby('Functional area')[['Scenario Name', 'Error Message', 'Time spent(m:s)']].apply(lambda x: x.reset_index(drop=True))
                 elif selected_status == 'Passed':
                     grouped_filtered_scenarios = filtered_scenarios.groupby('Functional area')[['Scenario Name', 'Time spent(m:s)']].apply(lambda x: x.reset_index(drop=True))
                 else:
                     # Create and display bar graph of errors by functional area
                     st.write(f"### Bar graph showing number of '{selected_status}' scenarios in each functional area:")
                     error_counts = grouped_filtered_scenarios['Functional area'].value_counts()
+                    # Only create the graph if there are errors to display
+                    if not error_counts.empty:
+                        plt.figure(figsize=(10, 6))
+                        plt.bar(error_counts.index, error_counts.values)
+                        plt.xlabel('Functional Area')
+                        plt.ylabel('Number of Failures')
+                        plt.title(f"Number of '{selected_status}' scenarios by Functional Area")
+                        plt.xticks(rotation=45, ha='right')
+                        # Set y-axis limits and ticks for consistent interval of 1
+                        y_max = max(error_counts.values) + 1
+                        plt.ylim(0, y_max)
+                        plt.yticks(range(0, y_max, 1))
+                        # Display individual numbers on y-axis
+                        for i, count in enumerate(error_counts.values):
+                            plt.text(i, count, str(count), ha='center', va='bottom')
+                        plt.tight_layout()  # Add this line to adjust layout
+                        st.pyplot(plt)
+                    else:
+                        st.info(f"No '{selected_status}' scenarios found to display in the graph.")
         else:
             st.write("### No scenarios with status 'failed' found.")
     pass

multiple.py CHANGED Viewed

@@ -9,6 +9,11 @@ def perform_analysis(uploaded_dataframes):
     # Concatenate all dataframes into a single dataframe
     combined_data = pd.concat(uploaded_dataframes, ignore_index=True)
     # Display scenarios with status "failed" grouped by functional area
     failed_scenarios = combined_data[combined_data['Status'] == 'FAILED']
     passed_scenarios = combined_data[combined_data['Status'] == 'PASSED']
@@ -34,6 +39,9 @@ def perform_analysis(uploaded_dataframes):
         # st.write(f"Scenarios with status '{selected_status}' grouped by functional area:")
         st.markdown(f"### Scenarios with status '{selected_status}' grouped by functional area:")
         # Select a range of functional areas to filter scenarios
         selected_functional_areas = st.multiselect("Select functional areas", unique_areas, ["All"])
@@ -45,7 +53,10 @@ def perform_analysis(uploaded_dataframes):
         if not selected_functional_areas:  # Check if the list is empty
             st.error("Please select at least one functional area.")
         else:
-         # Calculate the average time spent for each functional area
             average_time_spent_seconds = filtered_scenarios.groupby('Functional area')['Time spent'].mean().reset_index()
             # Convert average time spent from seconds to minutes and seconds format
             average_time_spent_seconds['Time spent'] = pd.to_datetime(average_time_spent_seconds['Time spent'], unit='s').dt.strftime('%M:%S')
@@ -69,7 +80,11 @@ def perform_analysis(uploaded_dataframes):
              # Filter scenarios based on selected functional area
             if selected_status == 'Failed':
-                grouped_filtered_scenarios = filtered_scenarios.groupby('Environment')[['Functional area', 'Scenario Name', 'Error Message','Time spent(m:s)','Start datetime']].apply(lambda x: x.reset_index(drop=True))
             elif selected_status == 'Passed':
                 grouped_filtered_scenarios = filtered_scenarios.groupby('Functional area')[['Scenario Name', 'Time spent(m:s)']].apply(lambda x: x.reset_index(drop=True))
             else:
@@ -86,59 +101,63 @@ def perform_analysis(uploaded_dataframes):
             # Sort the average time spent table by start datetime
             average_time_spent_seconds = average_time_spent_seconds.sort_values(by='Start datetime')
-           # Display average time spent on each functional area in a table
-            st.markdown("### Total and Average Time Spent on Each Functional Area")
-            average_time_spent_seconds.index = average_time_spent_seconds.index + 1
-             # Rename the columns for clarity
-            average_time_spent_seconds.rename(columns={'Start datetime': 'Start Datetime', 'End datetime': 'End Datetime', 'Time spent':'Average Time Spent'}, inplace=True)
-            # Rearrange the columns
-            average_time_spent_seconds = average_time_spent_seconds[['Functional area', 'Total Time Spent', 'Start Datetime', 'End Datetime', 'Average Time Spent']]
-            st.dataframe(average_time_spent_seconds)
             # Check if selected_status is 'Failed' and grouped_filtered_scenarifos length is less than or equal to 400
             if selected_status != 'Passed':
                 # Create and display bar graph of errors by functional area
                 st.write(f"### Bar graph showing number of '{selected_status}' scenarios in each functional area:")
                 error_counts = grouped_filtered_scenarios['Functional area'].value_counts()
-                plt.figure(figsize=(12, 10))
-                bars = plt.bar(error_counts.index, error_counts.values)
-                plt.xlabel('Functional Area')
-                plt.ylabel('Number of Failures')
-                plt.title(f"Number of '{selected_status}' scenarios by Functional Area")
-                plt.xticks(rotation=45, ha='right', fontsize=10)
-                # Set y-axis limits and ticks for consistent interval of 1
-                y_max = max(error_counts.values) + 1
-                plt.ylim(0, y_max)
-                plt.yticks(range(0, y_max, 1), fontsize=10)
-                # Display individual numbers on y-axis
-                for bar in bars:
-                    height = bar.get_height()
-                    plt.text(bar.get_x() + bar.get_width() / 2, height, str(int(height)),
-                             ha='center', va='bottom')  # Reduce font size of individual numbers
-                plt.tight_layout()  # Add this line to adjust layout
-                st.pyplot(plt)
     pass
 def multiple_main():
-    # Get the number of environments from the user
-    num_environments = st.number_input("Enter the number of environments", min_value=1, value=1, step=1)
     # Initialize list to store uploaded dataframes
     uploaded_dataframes = []
     # Loop through the number of environments and create file uploaders
-    for i in range(num_environments):
-        uploaded_files = st.file_uploader(f"Upload CSV or XLSX files for Environment {i + 1}", type=["csv", "xlsx"], accept_multiple_files=True)
-        for uploaded_file in uploaded_files:
-            # Preprocess the uploaded file
-            data = preprocess_uploaded_file(uploaded_file)
-            # Append the dataframe to the list
-            uploaded_dataframes.append(data)
     # Check if any files were uploaded
     if uploaded_dataframes:

     # Concatenate all dataframes into a single dataframe
     combined_data = pd.concat(uploaded_dataframes, ignore_index=True)
+    # Display debugging information
+    # st.write("Combined data shape:", combined_data.shape)
+    # st.write("Unique functional areas in combined data:", combined_data['Functional area'].nunique())
+    # st.write("Sample of combined data:", combined_data.head())
     # Display scenarios with status "failed" grouped by functional area
     failed_scenarios = combined_data[combined_data['Status'] == 'FAILED']
     passed_scenarios = combined_data[combined_data['Status'] == 'PASSED']
         # st.write(f"Scenarios with status '{selected_status}' grouped by functional area:")
         st.markdown(f"### Scenarios with status '{selected_status}' grouped by functional area:")
+        # # Display count of unique functional areas
+        # st.write(f"Number of unique functional areas: {len(unique_areas) - 1}")  # Subtract 1 for "All"
         # Select a range of functional areas to filter scenarios
         selected_functional_areas = st.multiselect("Select functional areas", unique_areas, ["All"])
         if not selected_functional_areas:  # Check if the list is empty
             st.error("Please select at least one functional area.")
         else:
+            # Display count of filtered scenarios
+            st.write(f"Number of filtered scenarios: {len(filtered_scenarios)}")
+            # Calculate the average time spent for each functional area
             average_time_spent_seconds = filtered_scenarios.groupby('Functional area')['Time spent'].mean().reset_index()
             # Convert average time spent from seconds to minutes and seconds format
             average_time_spent_seconds['Time spent'] = pd.to_datetime(average_time_spent_seconds['Time spent'], unit='s').dt.strftime('%M:%S')
              # Filter scenarios based on selected functional area
             if selected_status == 'Failed':
+                # Check if Failed Step column exists
+                if 'Failed Step' in filtered_scenarios.columns:
+                    grouped_filtered_scenarios = filtered_scenarios.groupby('Environment')[['Functional area', 'Scenario Name', 'Error Message', 'Failed Step', 'Time spent(m:s)', 'Start datetime']].apply(lambda x: x.reset_index(drop=True))
+                else:
+                    grouped_filtered_scenarios = filtered_scenarios.groupby('Environment')[['Functional area', 'Scenario Name', 'Error Message', 'Time spent(m:s)', 'Start datetime']].apply(lambda x: x.reset_index(drop=True))
             elif selected_status == 'Passed':
                 grouped_filtered_scenarios = filtered_scenarios.groupby('Functional area')[['Scenario Name', 'Time spent(m:s)']].apply(lambda x: x.reset_index(drop=True))
             else:
             # Sort the average time spent table by start datetime
             average_time_spent_seconds = average_time_spent_seconds.sort_values(by='Start datetime')
+        #    # Display average time spent on each functional area in a table
+        #     st.markdown("### Total and Average Time Spent on Each Functional Area")
+        #     average_time_spent_seconds.index = average_time_spent_seconds.index + 1
+        #      # Rename the columns for clarity
+        #     average_time_spent_seconds.rename(columns={'Start datetime': 'Start Datetime', 'End datetime': 'End Datetime', 'Time spent':'Average Time Spent'}, inplace=True)
+        #     # Rearrange the columns
+        #     average_time_spent_seconds = average_time_spent_seconds[['Functional area', 'Total Time Spent', 'Start Datetime', 'End Datetime', 'Average Time Spent']]
+        #     st.dataframe(average_time_spent_seconds)
             # Check if selected_status is 'Failed' and grouped_filtered_scenarifos length is less than or equal to 400
             if selected_status != 'Passed':
                 # Create and display bar graph of errors by functional area
                 st.write(f"### Bar graph showing number of '{selected_status}' scenarios in each functional area:")
                 error_counts = grouped_filtered_scenarios['Functional area'].value_counts()
+                # Only create the graph if there are errors to display
+                if not error_counts.empty:
+                    plt.figure(figsize=(12, 10))
+                    bars = plt.bar(error_counts.index, error_counts.values)
+                    plt.xlabel('Functional Area')
+                    plt.ylabel('Number of Failures')
+                    plt.title(f"Number of '{selected_status}' scenarios by Functional Area")
+                    plt.xticks(rotation=45, ha='right', fontsize=10)
+                    # Set y-axis limits and ticks for consistent interval of 1
+                    y_max = max(error_counts.values) + 1
+                    plt.ylim(0, y_max)
+                    plt.yticks(range(0, y_max, 1), fontsize=10)
+                    # Display individual numbers on y-axis
+                    for bar in bars:
+                        height = bar.get_height()
+                        plt.text(bar.get_x() + bar.get_width() / 2, height, str(int(height)),
+                                ha='center', va='bottom')  # Reduce font size of individual numbers
+                    plt.tight_layout()  # Add this line to adjust layout
+                    st.pyplot(plt)
+                else:
+                    st.info(f"No '{selected_status}' scenarios found to display in the graph.")
     pass
 def multiple_main():
+    # num_environments = st.number_input("Enter the number of environments", min_value=1, value=1, step=1)
     # Initialize list to store uploaded dataframes
     uploaded_dataframes = []
     # Loop through the number of environments and create file uploaders
+    # for i in range(num_environments):
+    uploaded_files = st.file_uploader("Upload multiple XLSX files from different environments", type=["xlsx"], accept_multiple_files=True)
+    for uploaded_file in uploaded_files:
+        # Preprocess the uploaded file
+        data = preprocess_uploaded_file(uploaded_file)
+        # Append the dataframe to the list
+        uploaded_dataframes.append(data)
     # Check if any files were uploaded
     if uploaded_dataframes:

pre.py CHANGED Viewed

@@ -39,27 +39,63 @@ def preprocess_xlsx(uploaded_file):
     dtype_dict = {
         'Feature Name': 'string',
         'Scenario Name': 'string',
-        'Total Time Taken (ms)': 'float64'
     }
     # Read both the first sheet for error messages and "Time Taken" sheet
     excel_file = pd.ExcelFile(uploaded_file, engine='openpyxl')
-    # Read error messages from first sheet
     error_df = pd.read_excel(excel_file, sheet_name=0)
-    # Read time taken data
     df = pd.read_excel(
         excel_file,
         sheet_name='Time Taken',
         dtype=dtype_dict
     )
     # Convert Failed Scenario column to boolean after reading
-    df['Failed Scenario'] = df['Failed Scenario'].astype(str).map({'TRUE': True, 'FALSE': False})
-    # Get error messages from the first sheet
-    error_messages = error_df[['Scenario Name', 'Error Message']].copy()
     # Extract date from filename (e.g., RI2211_batch_20250225_27031.xlsx)
     filename = uploaded_file.name
@@ -81,12 +117,17 @@ def preprocess_xlsx(uploaded_file):
     result_df = pd.DataFrame({
         'Functional area': df['Feature Name'],
         'Scenario Name': df['Scenario Name'],
-        'Status': df['Failed Scenario'].map({True: 'FAILED', False: 'PASSED'}),
         'Time spent': df['Total Time Taken (ms)'] / 1000  # Convert ms to seconds
     })
     # Merge error messages with result dataframe
-    result_df = result_df.merge(error_messages, on='Scenario Name', how='left')
     # Add environment column
     result_df['Environment'] = environment
@@ -94,10 +135,37 @@ def preprocess_xlsx(uploaded_file):
     # Calculate formatted time spent
     result_df['Time spent(m:s)'] = pd.to_datetime(result_df['Time spent'], unit='s').dt.strftime('%M:%S')
-    # Add start datetime (using file date since actual start time isn't available in this sheet)
     result_df['Start datetime'] = pd.to_datetime(file_date)
     result_df['End datetime'] = result_df['Start datetime'] + pd.to_timedelta(result_df['Time spent'], unit='s')
     return result_df
 def fill_missing_data(data, column_index, value):
@@ -111,29 +179,29 @@ def to_camel_case(s):
 # Define the function to preprocess a file (CSV or XLSX)
 def preprocess_uploaded_file(uploaded_file):
-    with st.spinner(f'Processing {uploaded_file.name}...'):
-        # Determine file type based on extension
-        if uploaded_file.name.lower().endswith('.xlsx'):
-            data = preprocess_xlsx(uploaded_file)
-        else:
-            # Original CSV processing
-            file_content = uploaded_file.read()
-            processed_output = preprocess_csv(file_content)
-            processed_file = io.StringIO(processed_output.getvalue())
-            data = load_data(processed_file)
-            data = fill_missing_data(data, 4, 0)
-            data['Start datetime'] = pd.to_datetime(data['Start datetime'], dayfirst=True, errors='coerce')
-            data['End datetime'] = pd.to_datetime(data['End datetime'], dayfirst=True, errors='coerce')
-            data['Time spent'] = (data['End datetime'] - data['Start datetime']).dt.total_seconds()
-            data['Time spent(m:s)'] = pd.to_datetime(data['Time spent'], unit='s').dt.strftime('%M:%S')
-            # Extract environment name from filename
-            filename = uploaded_file.name
-            environment = filename.split('_Puppeteer')[0]
-            # Add environment column to the dataframe
-            data['Environment'] = environment
     return data
 def add_app_description():

     dtype_dict = {
         'Feature Name': 'string',
         'Scenario Name': 'string',
+        'Total Time Taken (ms)': 'float64',
+        'Failed Scenario': 'string'
     }
     # Read both the first sheet for error messages and "Time Taken" sheet
     excel_file = pd.ExcelFile(uploaded_file, engine='openpyxl')
+    # Read detailed step data from first sheet (contains error messages)
     error_df = pd.read_excel(excel_file, sheet_name=0)
+    # Read time taken data from the "Time Taken" sheet
     df = pd.read_excel(
         excel_file,
         sheet_name='Time Taken',
         dtype=dtype_dict
     )
+    # Print column names and sample values for debugging
+    # st.write("Excel columns:", df.columns.tolist())
+    # st.write("Sample data from Time Taken sheet:", df.head())
+    # st.write("Unique Feature Names:", df['Feature Name'].unique())
+    # st.write("Feature Name count:", df['Feature Name'].nunique())
+    # # Check for any empty or NaN values in Feature Name
+    # empty_features = df['Feature Name'].isna().sum()
+    # st.write(f"Empty Feature Names: {empty_features}")
     # Convert Failed Scenario column to boolean after reading
+    # Handle different possible values (TRUE/FALSE, True/False, etc.)
+    df['Failed Scenario'] = df['Failed Scenario'].astype(str).str.upper()
+    df['Status'] = df['Failed Scenario'].map(
+        lambda x: 'FAILED' if x in ['TRUE', 'YES', 'Y', '1'] else 'PASSED'
+    )
+    # Count failed and passed scenarios
+    failed_count = (df['Status'] == 'FAILED').sum()
+    passed_count = (df['Status'] == 'PASSED').sum()
+    # Extract error messages from the first sheet
+    # Find rows with FAILED result and group by Scenario Name to get the error message
+    if 'Result' in error_df.columns:
+        failed_steps = error_df[error_df['Result'] == 'FAILED'].copy()
+        # If there are failed steps, get the error messages
+        if not failed_steps.empty:
+            # Group by Scenario Name and get the first error message and step for each scenario
+            error_messages = failed_steps.groupby('Scenario Name').agg({
+                'Error Message': 'first',
+                'Step': 'first'  # Capture the step where it failed
+            }).reset_index()
+        else:
+            # Create empty DataFrame with required columns
+            error_messages = pd.DataFrame(columns=['Scenario Name', 'Error Message', 'Step'])
+    else:
+        # If Result column doesn't exist, create empty DataFrame
+        error_messages = pd.DataFrame(columns=['Scenario Name', 'Error Message', 'Step'])
     # Extract date from filename (e.g., RI2211_batch_20250225_27031.xlsx)
     filename = uploaded_file.name
     result_df = pd.DataFrame({
         'Functional area': df['Feature Name'],
         'Scenario Name': df['Scenario Name'],
+        'Status': df['Status'],
         'Time spent': df['Total Time Taken (ms)'] / 1000  # Convert ms to seconds
     })
+    # Fill any NaN values in Functional area
+    result_df['Functional area'] = result_df['Functional area'].fillna('Unknown')
     # Merge error messages with result dataframe
+    if not error_messages.empty:
+        result_df = result_df.merge(error_messages[['Scenario Name', 'Error Message', 'Step']],
+                                   on='Scenario Name', how='left')
     # Add environment column
     result_df['Environment'] = environment
     # Calculate formatted time spent
     result_df['Time spent(m:s)'] = pd.to_datetime(result_df['Time spent'], unit='s').dt.strftime('%M:%S')
     result_df['Start datetime'] = pd.to_datetime(file_date)
     result_df['End datetime'] = result_df['Start datetime'] + pd.to_timedelta(result_df['Time spent'], unit='s')
+    # Add failed step information if available
+    if 'Step' in result_df.columns:
+        result_df['Failed Step'] = result_df['Step']
+        result_df.drop('Step', axis=1, inplace=True)
+    # Extract start time from the first sheet
+    before_steps = error_df[error_df['Step'].str.contains('before', case=False, na=False)]
+    if not before_steps.empty:
+        # Get the first 'before' step for each scenario
+        before_steps['Time Stamp'] = pd.to_datetime(before_steps['Time Stamp'], format='%H:%M:%S', errors='coerce')
+        start_times = before_steps.groupby('Scenario Name').agg({'Time Stamp': 'first'}).reset_index()
+        # Store the timestamps in a variable for efficient reuse
+        result_df = result_df.merge(start_times, on='Scenario Name', how='left')
+        result_df.rename(columns={'Time Stamp': 'Scenario Start Time'}, inplace=True)
+        scenario_start_times = result_df['Scenario Start Time']
+        # Combine the date from the filename with the time stamp
+        result_df['Start datetime'] = pd.to_datetime(scenario_start_times.dt.strftime('%H:%M:%S') + ' ' + file_date.strftime('%Y-%m-%d'))
+    # Print counts for debugging
+    # st.write(f"Processed data - Failed: {len(result_df[result_df['Status'] == 'FAILED'])}, Passed: {len(result_df[result_df['Status'] == 'PASSED'])}")
+    # st.write(f"Unique functional areas in processed data: {result_df['Functional area'].nunique()}")
+    # st.write(f"Unique functional areas: {result_df['Functional area'].unique()}")
+    # Debugging: Print the columns of the first sheet
+    # st.write("Columns in the first sheet:", error_df.columns.tolist())
+    # st.write("Sample data from the first sheet:", error_df.head())
     return result_df
 def fill_missing_data(data, column_index, value):
 # Define the function to preprocess a file (CSV or XLSX)
 def preprocess_uploaded_file(uploaded_file):
+    # Commenting out the spinner to disable it
+    # with st.spinner(f'Processing {uploaded_file.name}...'):
+    # Determine file type based on extension
+    if uploaded_file.name.lower().endswith('.xlsx'):
+        data = preprocess_xlsx(uploaded_file)
+    else:
+        # Original CSV processing
+        file_content = uploaded_file.read()
+        processed_output = preprocess_csv(file_content)
+        processed_file = io.StringIO(processed_output.getvalue())
+        data = load_data(processed_file)
+        data = fill_missing_data(data, 4, 0)
+        data['Start datetime'] = pd.to_datetime(data['Start datetime'], dayfirst=True, errors='coerce')
+        data['End datetime'] = pd.to_datetime(data['End datetime'], dayfirst=True, errors='coerce')
+        data['Time spent'] = (data['End datetime'] - data['Start datetime']).dt.total_seconds()
+        data['Time spent(m:s)'] = pd.to_datetime(data['Time spent'], unit='s').dt.strftime('%M:%S')
+        # Extract environment name from filename
+        filename = uploaded_file.name
+        environment = filename.split('_Puppeteer')[0]
+        # Add environment column to the dataframe
+        data['Environment'] = environment
     return data
 def add_app_description():

second.py CHANGED Viewed

@@ -39,6 +39,17 @@ def double_main(uploaded_file1, uploaded_file2):
     # Merge dataframes on 'Scenario Name'
     merged_df = pd.merge(older_df, newer_df, on=['Functional area', 'Scenario Name'], suffixes=('_old', '_new'))
     # Filter scenarios
     fail_to_fail_scenarios = merged_df[(merged_df['Status_old'] == 'FAILED') & (merged_df['Status_new'] == 'FAILED')]
     pass_to_fail_scenarios = merged_df[(merged_df['Status_old'] == 'PASSED') & (merged_df['Status_new'] == 'FAILED')]
@@ -101,21 +112,28 @@ def double_main(uploaded_file1, uploaded_file2):
     with tab1:
         st.write(f"Failing scenarios Count: {fail_count}")
-        columns_to_display1 = ['Functional area', 'Scenario Name', 'Error message_old', 'Error message_new']
         st.dataframe(fail_to_fail_scenarios[columns_to_display1])
         csv = convert_df(fail_to_fail_scenarios[columns_to_display1])
         st.download_button("Download Consistent Failures as CSV", data=csv, file_name='consistent_failures.csv', mime='text/csv')
     with tab2:
         st.write(f"Failing scenarios Count: {pass_fail_count}")
-        columns_to_display2 = ['Functional area', 'Scenario Name', 'Error message_new', 'Time spent_old', 'Time spent_new']
         st.dataframe(pass_to_fail_scenarios[columns_to_display2])
         csv = convert_df(pass_to_fail_scenarios[columns_to_display2])
         st.download_button("Download New Failures as CSV", data=csv, file_name='new_failures.csv', mime='text/csv')
     with tab3:
         st.write(f"Passing scenarios Count: {pass_count}")
-        columns_to_display3 = ['Functional area', 'Scenario Name', 'Error message_old', 'Time spent_old', 'Time spent_new']
         st.dataframe(fail_to_pass_scenarios[columns_to_display3])
         csv = convert_df(fail_to_pass_scenarios[columns_to_display3])
         st.download_button("Download New Passes as CSV", data=csv, file_name='new_passes.csv', mime='text/csv')

     # Merge dataframes on 'Scenario Name'
     merged_df = pd.merge(older_df, newer_df, on=['Functional area', 'Scenario Name'], suffixes=('_old', '_new'))
+    # Ensure column naming consistency
+    # Rename columns if needed for consistency
+    column_mapping = {}
+    for col in merged_df.columns:
+        if col.startswith('Error message'):
+            new_col = col.replace('Error message', 'Error Message')
+            column_mapping[col] = new_col
+    if column_mapping:
+        merged_df.rename(columns=column_mapping, inplace=True)
     # Filter scenarios
     fail_to_fail_scenarios = merged_df[(merged_df['Status_old'] == 'FAILED') & (merged_df['Status_new'] == 'FAILED')]
     pass_to_fail_scenarios = merged_df[(merged_df['Status_old'] == 'PASSED') & (merged_df['Status_new'] == 'FAILED')]
     with tab1:
         st.write(f"Failing scenarios Count: {fail_count}")
+        # Check if Failed Step columns exist
+        columns_to_display1 = ['Functional area', 'Scenario Name', 'Error Message_old', 'Error Message_new']
+        if 'Failed Step_old' in fail_to_fail_scenarios.columns and 'Failed Step_new' in fail_to_fail_scenarios.columns:
+            columns_to_display1 = ['Functional area', 'Scenario Name', 'Error Message_old', 'Failed Step_old', 'Error Message_new', 'Failed Step_new']
         st.dataframe(fail_to_fail_scenarios[columns_to_display1])
         csv = convert_df(fail_to_fail_scenarios[columns_to_display1])
         st.download_button("Download Consistent Failures as CSV", data=csv, file_name='consistent_failures.csv', mime='text/csv')
     with tab2:
         st.write(f"Failing scenarios Count: {pass_fail_count}")
+        columns_to_display2 = ['Functional area', 'Scenario Name', 'Error Message_new', 'Time spent_old', 'Time spent_new']
+        if 'Failed Step_new' in pass_to_fail_scenarios.columns:
+            columns_to_display2 = ['Functional area', 'Scenario Name', 'Error Message_new', 'Failed Step_new', 'Time spent_old', 'Time spent_new']
         st.dataframe(pass_to_fail_scenarios[columns_to_display2])
         csv = convert_df(pass_to_fail_scenarios[columns_to_display2])
         st.download_button("Download New Failures as CSV", data=csv, file_name='new_failures.csv', mime='text/csv')
     with tab3:
         st.write(f"Passing scenarios Count: {pass_count}")
+        columns_to_display3 = ['Functional area', 'Scenario Name', 'Error Message_old', 'Time spent_old', 'Time spent_new']
+        if 'Failed Step_old' in fail_to_pass_scenarios.columns:
+            columns_to_display3 = ['Functional area', 'Scenario Name', 'Error Message_old', 'Failed Step_old', 'Time spent_old', 'Time spent_new']
         st.dataframe(fail_to_pass_scenarios[columns_to_display3])
         csv = convert_df(fail_to_pass_scenarios[columns_to_display3])
         st.download_button("Download New Passes as CSV", data=csv, file_name='new_passes.csv', mime='text/csv')