Spaces:

BananaSauce
/

batch-run-csv-analyser

Sleeping

App Files Files Community

BananaSauce commited on Mar 6

Commit

3ff5801

1 Parent(s): 4e4d72e

changes xlsx

Browse files

Files changed (8) hide show

README.md +96 -1
app.py +11 -12
multi_env_compare.py +11 -11
multiple.py +5 -5
pre.py +99 -20
requirements.txt +6 -6
second.py +19 -16
weekly.py +40 -15

README.md CHANGED Viewed

@@ -7,4 +7,99 @@ sdk: streamlit
 sdk_version: 1.28.1
 app_file: app.py
 pinned: false
----

 sdk_version: 1.28.1
 app_file: app.py
 pinned: false
+---
+# Batch Run Analyzer
+A comprehensive Streamlit application for analyzing batch run results from CSV or XLSX files, visualizing pass/fail statistics, and comparing runs across different environments.
+## Features
+- Support for both CSV and XLSX file formats
+- Multiple analysis modes:
+  - **Multi**: Analyze multiple files from different environments
+  - **Compare**: Compare two files to identify differences in scenario outcomes
+  - **Weekly**: Generate weekly trend reports
+  - **Multi-Env Compare**: Compare scenarios across multiple environments
+- Detailed statistics on passing and failing scenarios
+- Visual charts for failure counts by functional area
+- Interactive filtering by functional area and status
+- Time spent analysis per functional area
+- Error Message analysis
+## Setup and Installation
+1. Clone this repository:
+   ```
+   git clone <repository-url>
+   cd batch-run-csv-analyser
+   ```
+2. Install the required dependencies:
+   ```
+   pip install -r requirements.txt
+   ```
+3. Run the application:
+   ```
+   streamlit run app.py
+   ```
+## File Format Support
+### CSV Format (Legacy)
+The application still supports the original CSV format with the following columns:
+- Functional area
+- Scenario Name
+- Start datetime
+- End datetime
+- Status
+- Error Message
+### XLSX Format (New)
+The application now supports XLSX files with step-level data:
+- Feature Name
+- Scenario Name
+- Step
+- Result
+- Time Stamp
+- Duration (ms)
+- Error Message
+The application will automatically detect the file format based on the file extension and process it accordingly.
+## Usage
+1. Start the application with `streamlit run app.py`
+2. Use the sidebar to select the desired analysis mode
+3. Upload the necessary files based on the selected mode
+4. Follow the on-screen instructions for filtering and analysis
+## Analysis Modes
+### Multi Mode
+Upload files from multiple environments for individual analysis. View statistics, filter by functional area, and see charts of failing scenarios.
+### Compare Mode
+Upload two files to compare scenario statuses between them. The application will identify:
+- Consistent failures (failed in both files)
+- New failures (passed in the older file, failed in the newer)
+- New passes (failed in the older file, passed in the newer)
+### Weekly Mode
+Upload files from multiple dates to see trend reports. Filter by environment and functional area, and view detailed statistics for each day.
+### Multi-Env Compare Mode
+Compare scenarios across multiple environments to identify inconsistencies in test coverage.
+## Notes
+- Filename format is important for date extraction in Weekly mode. The application will try to extract dates using various patterns like `name_YYYYMMDD_HHMMSS`, `name_YYYYMMDD`, or any 8-digit sequence resembling a date.
+- For XLSX files, all steps within a scenario are aggregated to determine the overall scenario status.
+## Troubleshooting
+If you encounter issues:
+1. Ensure the file format follows the expected structure
+2. Check the logs for specific error messages
+3. Try processing smaller files first to verify functionality

app.py CHANGED Viewed

@@ -13,9 +13,8 @@ from multi_env_compare import multi_env_compare_main
 def single_main(uploaded_file):
     if uploaded_file is not None:
-       # Process the csv files with header
         data = preprocess_uploaded_file(uploaded_file)
-        # st.write(data)
        # Display scenarios with status "failed" grouped by functional area
         failed_scenarios = data[data['Status'] == 'FAILED']
@@ -70,9 +69,9 @@ def single_main(uploaded_file):
                 # Filter scenarios based on selected functional area
                 if selected_status == 'Failed':
-                    grouped_filtered_scenarios = filtered_scenarios.groupby('Functional area')[['Scenario name', 'Error message','Time spent(m:s)']].apply(lambda x: x.reset_index(drop=True))
                 elif selected_status == 'Passed':
-                    grouped_filtered_scenarios = filtered_scenarios.groupby('Functional area')[['Scenario name', 'Time spent(m:s)']].apply(lambda x: x.reset_index(drop=True))
                 else:
                     grouped_filtered_scenarios = None
@@ -81,7 +80,7 @@ def single_main(uploaded_file):
                 grouped_filtered_scenarios.drop(columns=['level_1'], inplace=True)
                 grouped_filtered_scenarios.index = grouped_filtered_scenarios.index + 1
-                st.markdown(grouped_filtered_scenarios)
                 # Sort the average time spent table by start datetime
                 average_time_spent_seconds = average_time_spent_seconds.sort_values(by='Start datetime')
@@ -136,14 +135,14 @@ def main():
     if st.session_state["mode"] == "multi":
         multiple_main()
     elif st.session_state["mode"] == "compare":
-        st.sidebar.markdown("### Upload CSV Files for Comparison")
         upload_option = st.sidebar.radio("Upload method", ["Single uploader", "Two separate uploaders"])
         if upload_option == "Single uploader":
-            uploaded_files = st.sidebar.file_uploader("Upload CSV files for comparison", type="csv", accept_multiple_files=True)
             if uploaded_files:
                 if len(uploaded_files) < 2:
-                    st.warning("Please upload at least two CSV files for comparison.")
                 elif len(uploaded_files) > 2:
                     st.warning("More than two files uploaded. Only the first two will be used for comparison.")
                 else:
@@ -153,18 +152,18 @@ def main():
         else:
             col1, col2 = st.sidebar.columns(2)
             with col1:
-                uploaded_file1 = st.file_uploader("Upload older CSV file", type="csv", key="file1")
             with col2:
-                uploaded_file2 = st.file_uploader("Upload newer CSV file", type="csv", key="file2")
             if uploaded_file1 is not None and uploaded_file2 is not None:
                 with st.spinner('Processing...'):
                     double_main(uploaded_file1, uploaded_file2)
                 st.success('Comparison Complete!')
             elif uploaded_file1 is not None or uploaded_file2 is not None:
-                st.warning("Please upload both CSV files for comparison.")
     elif st.session_state["mode"] == "weekly":
-        uploaded_files = st.sidebar.file_uploader("Upload CSV files for Weekly Report", type="csv", accept_multiple_files=True)
         if uploaded_files:
             generate_weekly_report(uploaded_files)
     elif st.session_state["mode"] == "multi-env compare":

 def single_main(uploaded_file):
     if uploaded_file is not None:
+       # Process the file with header
         data = preprocess_uploaded_file(uploaded_file)
        # Display scenarios with status "failed" grouped by functional area
         failed_scenarios = data[data['Status'] == 'FAILED']
                 # Filter scenarios based on selected functional area
                 if selected_status == 'Failed':
+                    grouped_filtered_scenarios = filtered_scenarios.groupby('Functional area')[['Scenario Name', 'Error Message','Time spent(m:s)']].apply(lambda x: x.reset_index(drop=True))
                 elif selected_status == 'Passed':
+                    grouped_filtered_scenarios = filtered_scenarios.groupby('Functional area')[['Scenario Name', 'Time spent(m:s)']].apply(lambda x: x.reset_index(drop=True))
                 else:
                     grouped_filtered_scenarios = None
                 grouped_filtered_scenarios.drop(columns=['level_1'], inplace=True)
                 grouped_filtered_scenarios.index = grouped_filtered_scenarios.index + 1
+                st.dataframe(grouped_filtered_scenarios)
                 # Sort the average time spent table by start datetime
                 average_time_spent_seconds = average_time_spent_seconds.sort_values(by='Start datetime')
     if st.session_state["mode"] == "multi":
         multiple_main()
     elif st.session_state["mode"] == "compare":
+        st.sidebar.markdown("### Upload Files for Comparison")
         upload_option = st.sidebar.radio("Upload method", ["Single uploader", "Two separate uploaders"])
         if upload_option == "Single uploader":
+            uploaded_files = st.sidebar.file_uploader("Upload CSV or XLSX files for comparison", type=["csv", "xlsx"], accept_multiple_files=True)
             if uploaded_files:
                 if len(uploaded_files) < 2:
+                    st.warning("Please upload at least two files for comparison.")
                 elif len(uploaded_files) > 2:
                     st.warning("More than two files uploaded. Only the first two will be used for comparison.")
                 else:
         else:
             col1, col2 = st.sidebar.columns(2)
             with col1:
+                uploaded_file1 = st.file_uploader("Upload older CSV/XLSX file", type=["csv", "xlsx"], key="file1")
             with col2:
+                uploaded_file2 = st.file_uploader("Upload newer CSV/XLSX file", type=["csv", "xlsx"], key="file2")
             if uploaded_file1 is not None and uploaded_file2 is not None:
                 with st.spinner('Processing...'):
                     double_main(uploaded_file1, uploaded_file2)
                 st.success('Comparison Complete!')
             elif uploaded_file1 is not None or uploaded_file2 is not None:
+                st.warning("Please upload both files for comparison.")
     elif st.session_state["mode"] == "weekly":
+        uploaded_files = st.sidebar.file_uploader("Upload CSV or XLSX files for Weekly Report", type=["csv", "xlsx"], accept_multiple_files=True)
         if uploaded_files:
             generate_weekly_report(uploaded_files)
     elif st.session_state["mode"] == "multi-env compare":

multi_env_compare.py CHANGED Viewed

@@ -13,7 +13,7 @@ def find_different_scenarios(grouped_data, area):
     area_data = grouped_data[grouped_data['Functional area'] == area]
     # Get scenarios for each environment
-    scenarios_by_env = {env: set(area_data[area_data['Environment'] == env]['Scenario name'])
                         for env in area_data['Environment'].unique()}
     # Find scenarios that are in one environment but not the other
@@ -71,8 +71,8 @@ def perform_multi_env_analysis(uploaded_dataframes):
         (combined_data['Functional area'].isin(selected_functional_areas))
     ]
-    # Group data by Environment, Functional area, Scenario name, and Status
-    grouped_data = filtered_data.groupby(['Environment', 'Functional area', 'Scenario name', 'Status']).size().unstack(fill_value=0)
     # Ensure 'PASSED' and 'FAILED' columns exist
     if 'PASSED' not in grouped_data.columns:
@@ -83,11 +83,11 @@ def perform_multi_env_analysis(uploaded_dataframes):
     # Calculate total scenarios
     grouped_data['Total'] = grouped_data['PASSED'] + grouped_data['FAILED']
-    # Reset index to make Environment, Functional area, and Scenario name as columns
     grouped_data = grouped_data.reset_index()
     # Reorder columns
-    grouped_data = grouped_data[['Environment', 'Functional area', 'Scenario name', 'Total', 'PASSED', 'FAILED']]
     # Display summary statistics
     st.write("### Summary Statistics")
@@ -103,7 +103,7 @@ def perform_multi_env_analysis(uploaded_dataframes):
     # Display the DataFrame
     st.dataframe(summary_with_headers)
     # Define scenarios_by_env here
-    scenarios_by_env = {env: set(grouped_data[grouped_data['Environment'] == env]['Scenario name']) for env in selected_environments}
     missing_scenarios = []
     mismatched_scenarios = []
@@ -112,7 +112,7 @@ def perform_multi_env_analysis(uploaded_dataframes):
     if len(selected_environments) > 1:
         # Group data by Environment and Functional area, count scenarios
-        scenario_counts = filtered_data.groupby(['Environment', 'Functional area'])['Scenario name'].nunique().unstack(fill_value=0)
         # Calculate the difference between max and min counts for each functional area
         count_diff = scenario_counts.max() - scenario_counts.min()
@@ -140,7 +140,7 @@ def perform_multi_env_analysis(uploaded_dataframes):
             # Get scenarios for each environment
             scenarios_by_env = {env: set(filtered_data[(filtered_data['Environment'] == env) &
-                                                       (filtered_data['Functional area'] == selected_area)]['Scenario name'])
                                 for env in selected_environments}
             # Find scenarios that are different between environments
@@ -188,10 +188,10 @@ def multi_env_compare_main():
     # Loop through the number of environments and create file uploaders
     for i in range(num_environments):
-        uploaded_files = st.file_uploader(f"Upload CSV files for Environment {i + 1}", type="csv", accept_multiple_files=True)
         for uploaded_file in uploaded_files:
-            # Preprocess the uploaded CSV file
             data = preprocess_uploaded_file(uploaded_file)
             # Append the dataframe to the list
@@ -202,7 +202,7 @@ def multi_env_compare_main():
         # Perform analysis for uploaded data
         perform_multi_env_analysis(uploaded_dataframes)
     else:
-        st.write("Please upload at least one CSV file.")
 if __name__ == "__main__":
     multi_env_compare_main()

     area_data = grouped_data[grouped_data['Functional area'] == area]
     # Get scenarios for each environment
+    scenarios_by_env = {env: set(area_data[area_data['Environment'] == env]['Scenario Name'])
                         for env in area_data['Environment'].unique()}
     # Find scenarios that are in one environment but not the other
         (combined_data['Functional area'].isin(selected_functional_areas))
     ]
+    # Group data by Environment, Functional area, Scenario Name, and Status
+    grouped_data = filtered_data.groupby(['Environment', 'Functional area', 'Scenario Name', 'Status']).size().unstack(fill_value=0)
     # Ensure 'PASSED' and 'FAILED' columns exist
     if 'PASSED' not in grouped_data.columns:
     # Calculate total scenarios
     grouped_data['Total'] = grouped_data['PASSED'] + grouped_data['FAILED']
+    # Reset index to make Environment, Functional area, and Scenario Name as columns
     grouped_data = grouped_data.reset_index()
     # Reorder columns
+    grouped_data = grouped_data[['Environment', 'Functional area', 'Scenario Name', 'Total', 'PASSED', 'FAILED']]
     # Display summary statistics
     st.write("### Summary Statistics")
     # Display the DataFrame
     st.dataframe(summary_with_headers)
     # Define scenarios_by_env here
+    scenarios_by_env = {env: set(grouped_data[grouped_data['Environment'] == env]['Scenario Name']) for env in selected_environments}
     missing_scenarios = []
     mismatched_scenarios = []
     if len(selected_environments) > 1:
         # Group data by Environment and Functional area, count scenarios
+        scenario_counts = filtered_data.groupby(['Environment', 'Functional area'])['Scenario Name'].nunique().unstack(fill_value=0)
         # Calculate the difference between max and min counts for each functional area
         count_diff = scenario_counts.max() - scenario_counts.min()
             # Get scenarios for each environment
             scenarios_by_env = {env: set(filtered_data[(filtered_data['Environment'] == env) &
+                                                       (filtered_data['Functional area'] == selected_area)]['Scenario Name'])
                                 for env in selected_environments}
             # Find scenarios that are different between environments
     # Loop through the number of environments and create file uploaders
     for i in range(num_environments):
+        uploaded_files = st.file_uploader(f"Upload CSV or XLSX files for Environment {i + 1}", type=["csv", "xlsx"], accept_multiple_files=True)
         for uploaded_file in uploaded_files:
+            # Preprocess the uploaded file
             data = preprocess_uploaded_file(uploaded_file)
             # Append the dataframe to the list
         # Perform analysis for uploaded data
         perform_multi_env_analysis(uploaded_dataframes)
     else:
+        st.write("Please upload at least one file.")
 if __name__ == "__main__":
     multi_env_compare_main()

multiple.py CHANGED Viewed

@@ -69,9 +69,9 @@ def perform_analysis(uploaded_dataframes):
              # Filter scenarios based on selected functional area
             if selected_status == 'Failed':
-                grouped_filtered_scenarios = filtered_scenarios.groupby('Environment')[['Functional area', 'Scenario name', 'Error message','Time spent(m:s)','Start datetime']].apply(lambda x: x.reset_index(drop=True))
             elif selected_status == 'Passed':
-                grouped_filtered_scenarios = filtered_scenarios.groupby('Functional area')[['Scenario name', 'Time spent(m:s)']].apply(lambda x: x.reset_index(drop=True))
             else:
                 grouped_filtered_scenarios = None
             grouped_filtered_scenarios.reset_index(inplace=True)
@@ -127,10 +127,10 @@ def multiple_main():
     # Loop through the number of environments and create file uploaders
     for i in range(num_environments):
-        uploaded_files = st.file_uploader(f"Upload CSV files for Environment {i + 1}", type="csv", accept_multiple_files=True)
         for uploaded_file in uploaded_files:
-            # Preprocess the uploaded CSV file
             data = preprocess_uploaded_file(uploaded_file)
             # Append the dataframe to the list
@@ -141,6 +141,6 @@ def multiple_main():
         # Perform analysis for uploaded data
         perform_analysis(uploaded_dataframes)
     else:
-        st.write("Please upload at least one CSV file.")
 pass

              # Filter scenarios based on selected functional area
             if selected_status == 'Failed':
+                grouped_filtered_scenarios = filtered_scenarios.groupby('Environment')[['Functional area', 'Scenario Name', 'Error Message','Time spent(m:s)','Start datetime']].apply(lambda x: x.reset_index(drop=True))
             elif selected_status == 'Passed':
+                grouped_filtered_scenarios = filtered_scenarios.groupby('Functional area')[['Scenario Name', 'Time spent(m:s)']].apply(lambda x: x.reset_index(drop=True))
             else:
                 grouped_filtered_scenarios = None
             grouped_filtered_scenarios.reset_index(inplace=True)
     # Loop through the number of environments and create file uploaders
     for i in range(num_environments):
+        uploaded_files = st.file_uploader(f"Upload CSV or XLSX files for Environment {i + 1}", type=["csv", "xlsx"], accept_multiple_files=True)
         for uploaded_file in uploaded_files:
+            # Preprocess the uploaded file
             data = preprocess_uploaded_file(uploaded_file)
             # Append the dataframe to the list
         # Perform analysis for uploaded data
         perform_analysis(uploaded_dataframes)
     else:
+        st.write("Please upload at least one file.")
 pass

pre.py CHANGED Viewed

@@ -2,8 +2,12 @@ import pandas as pd
 import streamlit as st
 import csv
 import io
 def preprocess_csv(input_bytes):
     text = input_bytes.decode()  # Decode bytes to text
     output = io.StringIO()
     writer = csv.writer(output)
@@ -28,6 +32,74 @@ def load_data(file):
     data = pd.read_csv(file, header=None, names=column_names)
     return data
 def fill_missing_data(data, column_index, value):
     data.iloc[:, column_index] = data.iloc[:, column_index].fillna(value)
     return data
@@ -37,23 +109,30 @@ def to_camel_case(s):
     parts = s.split('_')
     return ''.join([part.capitalize() for part in parts])
-# Define the function to preprocess a CSV file
 def preprocess_uploaded_file(uploaded_file):
-    file_content = uploaded_file.read()
-    processed_output = preprocess_csv(file_content)
-    processed_file = io.StringIO(processed_output.getvalue())
-    data = load_data(processed_file)
-    data = fill_missing_data(data, 4, 0)
-    data['Start datetime'] = pd.to_datetime(data['Start datetime'], dayfirst=True, errors='coerce')
-    data['End datetime'] = pd.to_datetime(data['End datetime'], dayfirst=True, errors='coerce')
-    data['Time spent'] = (data['End datetime'] - data['Start datetime']).dt.total_seconds()
-    data['Time spent(m:s)'] = pd.to_datetime(data['Time spent'], unit='s').dt.strftime('%M:%S')
-    # Extract environment name from filename
-    filename = uploaded_file.name
-    environment = filename.split('_Puppeteer')[0]
-    # Add environment column to the dataframe
-    data['Environment'] = environment
     return data
@@ -66,13 +145,13 @@ def add_app_description():
     if is_selected:
         with st.expander('Show App Description'):
-            st.markdown("Welcome to DataLink Compare. This tool allows you to analyze CSV files containing scenarios' data and provides insights into their statuses, processing times, and more. You can also compare two CSV files to identify differences and similarities between them.")
             st.markdown("### Instructions:")
-            st.write("1. Upload your CSV file using the file uploader on the sidebar.")
-            st.write("2. Choose between 'Multi' and 'Compare' mode using the button on the sidebar.")
-            st.write("3. In 'Multi' mode, you can upload and analyze multiple CSV files for individual environments.")
-            st.write("4. In 'Compare' mode, you can upload two CSV files to compare them.")
             st.markdown("### Features:")
             st.write("- View statistics of passing and failing scenarios.")

 import streamlit as st
 import csv
 import io
+import openpyxl  # Add this import for Excel handling
+from datetime import datetime
+import re
 def preprocess_csv(input_bytes):
+    # Keep this for backward compatibility with CSV files
     text = input_bytes.decode()  # Decode bytes to text
     output = io.StringIO()
     writer = csv.writer(output)
     data = pd.read_csv(file, header=None, names=column_names)
     return data
+@st.cache_data
+def preprocess_xlsx(uploaded_file):
+    """Process Excel file with step-level data and convert to scenario-level summary"""
+    # Define data types for columns
+    dtype_dict = {
+        'Feature Name': 'string',
+        'Scenario Name': 'string',
+        'Total Time Taken (ms)': 'float64'
+    }
+    # Read both the first sheet for error messages and "Time Taken" sheet
+    excel_file = pd.ExcelFile(uploaded_file, engine='openpyxl')
+    # Read error messages from first sheet
+    error_df = pd.read_excel(excel_file, sheet_name=0)
+    # Read time taken data
+    df = pd.read_excel(
+        excel_file,
+        sheet_name='Time Taken',
+        dtype=dtype_dict
+    )
+    # Convert Failed Scenario column to boolean after reading
+    df['Failed Scenario'] = df['Failed Scenario'].astype(str).map({'TRUE': True, 'FALSE': False})
+    # Get error messages from the first sheet
+    error_messages = error_df[['Scenario Name', 'Error message']].copy()
+    # Extract date from filename (e.g., RI2211_batch_20250225_27031.xlsx)
+    filename = uploaded_file.name
+    date_match = re.search(r'_(\d{8})_', filename)
+    if date_match:
+        date_str = date_match.group(1)
+        file_date = datetime.strptime(date_str, '%Y%m%d').date()
+    else:
+        st.warning(f"Could not extract date from filename: {filename}. Using current date.")
+        file_date = datetime.now().date()
+    # Extract environment from filename
+    if any(pattern in filename for pattern in ['_batch_', '_fin_', '_priority_', '_Puppeteer_']):
+        environment = filename.split('_')[0]
+    else:
+        environment = filename.split('.')[0]
+    # Create result dataframe
+    result_df = pd.DataFrame({
+        'Functional area': df['Feature Name'],
+        'Scenario name': df['Scenario Name'],
+        'Status': df['Failed Scenario'].map({True: 'FAILED', False: 'PASSED'}),
+        'Time spent': df['Total Time Taken (ms)'] / 1000  # Convert ms to seconds
+    })
+    # Merge error messages with result dataframe
+    result_df = result_df.merge(error_messages, on='Scenario name', how='left')
+    # Add environment column
+    result_df['Environment'] = environment
+    # Calculate formatted time spent
+    result_df['Time spent(m:s)'] = pd.to_datetime(result_df['Time spent'], unit='s').dt.strftime('%M:%S')
+    # Add start datetime (using file date since actual start time isn't available in this sheet)
+    result_df['Start datetime'] = pd.to_datetime(file_date)
+    result_df['End datetime'] = result_df['Start datetime'] + pd.to_timedelta(result_df['Time spent'], unit='s')
+    return result_df
 def fill_missing_data(data, column_index, value):
     data.iloc[:, column_index] = data.iloc[:, column_index].fillna(value)
     return data
     parts = s.split('_')
     return ''.join([part.capitalize() for part in parts])
+# Define the function to preprocess a file (CSV or XLSX)
 def preprocess_uploaded_file(uploaded_file):
+    with st.spinner(f'Processing {uploaded_file.name}...'):
+        # Determine file type based on extension
+        if uploaded_file.name.lower().endswith('.xlsx'):
+            data = preprocess_xlsx(uploaded_file)
+        else:
+            # Original CSV processing
+            file_content = uploaded_file.read()
+            processed_output = preprocess_csv(file_content)
+            processed_file = io.StringIO(processed_output.getvalue())
+            data = load_data(processed_file)
+            data = fill_missing_data(data, 4, 0)
+            data['Start datetime'] = pd.to_datetime(data['Start datetime'], dayfirst=True, errors='coerce')
+            data['End datetime'] = pd.to_datetime(data['End datetime'], dayfirst=True, errors='coerce')
+            data['Time spent'] = (data['End datetime'] - data['Start datetime']).dt.total_seconds()
+            data['Time spent(m:s)'] = pd.to_datetime(data['Time spent'], unit='s').dt.strftime('%M:%S')
+            # Extract environment name from filename
+            filename = uploaded_file.name
+            environment = filename.split('_Puppeteer')[0]
+            # Add environment column to the dataframe
+            data['Environment'] = environment
     return data
     if is_selected:
         with st.expander('Show App Description'):
+            st.markdown("Welcome to DataLink Compare. This tool allows you to analyze batch run reports and provides insights into their statuses, processing times, and more. You can also compare two files to identify differences and similarities between them.")
             st.markdown("### Instructions:")
+            st.write("1. Upload your CSV or XLSX file using the file uploader on the sidebar.")
+            st.write("2. Choose between 'Multi', 'Compare', 'Weekly', and 'Multi-Env Compare' mode using the dropdown on the sidebar.")
+            st.write("3. In 'Multi' mode, you can upload and analyze multiple files for individual environments.")
+            st.write("4. In 'Compare' mode, you can upload two files to compare them.")
             st.markdown("### Features:")
             st.write("- View statistics of passing and failing scenarios.")

requirements.txt CHANGED Viewed

@@ -1,6 +1,6 @@
-streamlit==1.28.1
-pandas
-matplotlib
-numpy
-XlsxWriter==3.0.8
-plotly

+pandas>=2.0.0
+streamlit>=1.0.0
+plotly>=5.0.0
+matplotlib>=3.0.0
+numpy>=1.20.0
+openpyxl>=3.0.0

second.py CHANGED Viewed

@@ -8,12 +8,15 @@ def convert_df(df):
 def double_main(uploaded_file1, uploaded_file2):
     if uploaded_file1 is None or uploaded_file2 is None:
-        st.warning("Please upload both CSV files for comparison.")
         return
-    # Preprocess the uploaded CSV files
-    data_1 = preprocess_uploaded_file(uploaded_file1)
-    data_2 = preprocess_uploaded_file(uploaded_file2)
     # Determine which file is older and newer
     if data_1['Start datetime'].min() < data_2['Start datetime'].min():
@@ -30,11 +33,11 @@ def double_main(uploaded_file1, uploaded_file2):
     newer_datetime = newer_df['Start datetime'].min()
     # Display start datetime of each file
-    st.write(f"The older csv started on {older_datetime}")
-    st.write(f"The newer csv started on {newer_datetime}")
-    # Merge dataframes on 'scenario name'
-    merged_df = pd.merge(older_df, newer_df, on=['Functional area', 'Scenario name'], suffixes=('_old', '_new'))
     # Filter scenarios
     fail_to_fail_scenarios = merged_df[(merged_df['Status_old'] == 'FAILED') & (merged_df['Status_new'] == 'FAILED')]
@@ -98,40 +101,40 @@ def double_main(uploaded_file1, uploaded_file2):
     with tab1:
         st.write(f"Failing scenarios Count: {fail_count}")
-        columns_to_display1 = ['Functional area', 'Scenario name', 'Error message_old', 'Error message_new']
         st.dataframe(fail_to_fail_scenarios[columns_to_display1])
         csv = convert_df(fail_to_fail_scenarios[columns_to_display1])
         st.download_button("Download Consistent Failures as CSV", data=csv, file_name='consistent_failures.csv', mime='text/csv')
     with tab2:
         st.write(f"Failing scenarios Count: {pass_fail_count}")
-        columns_to_display2 = ['Functional area', 'Scenario name', 'Error message_new', 'Time spent_old', 'Time spent_new']
         st.dataframe(pass_to_fail_scenarios[columns_to_display2])
         csv = convert_df(pass_to_fail_scenarios[columns_to_display2])
         st.download_button("Download New Failures as CSV", data=csv, file_name='new_failures.csv', mime='text/csv')
     with tab3:
         st.write(f"Passing scenarios Count: {pass_count}")
-        columns_to_display3 = ['Functional area', 'Scenario name', 'Error message_old', 'Time spent_old', 'Time spent_new']
         st.dataframe(fail_to_pass_scenarios[columns_to_display3])
         csv = convert_df(fail_to_pass_scenarios[columns_to_display3])
         st.download_button("Download New Passes as CSV", data=csv, file_name='new_passes.csv', mime='text/csv')
 def main():
-    st.title("CSV Comparison Tool")
     st.markdown("""
-    This tool compares two CSV files and highlights the differences in the scenarios.
-    Please upload the older and newer CSV files below.
     """)
     col1, col2 = st.columns(2)
     with col1:
-        uploaded_file1 = st.file_uploader("Upload the older CSV file", type='csv', key='uploader1')
     with col2:
-        uploaded_file2 = st.file_uploader("Upload the newer CSV file", type='csv', key='uploader2')
     if uploaded_file1 is not None and uploaded_file2 is not None:
         with st.spinner('Processing...'):

 def double_main(uploaded_file1, uploaded_file2):
     if uploaded_file1 is None or uploaded_file2 is None:
+        st.warning("Please upload both files for comparison.")
         return
+    # Preprocess the uploaded files (CSV or XLSX)
+    with st.spinner("Processing the first file..."):
+        data_1 = preprocess_uploaded_file(uploaded_file1)
+    with st.spinner("Processing the second file..."):
+        data_2 = preprocess_uploaded_file(uploaded_file2)
     # Determine which file is older and newer
     if data_1['Start datetime'].min() < data_2['Start datetime'].min():
     newer_datetime = newer_df['Start datetime'].min()
     # Display start datetime of each file
+    st.write(f"The older file started on {older_datetime}")
+    st.write(f"The newer file started on {newer_datetime}")
+    # Merge dataframes on 'Scenario Name'
+    merged_df = pd.merge(older_df, newer_df, on=['Functional area', 'Scenario Name'], suffixes=('_old', '_new'))
     # Filter scenarios
     fail_to_fail_scenarios = merged_df[(merged_df['Status_old'] == 'FAILED') & (merged_df['Status_new'] == 'FAILED')]
     with tab1:
         st.write(f"Failing scenarios Count: {fail_count}")
+        columns_to_display1 = ['Functional area', 'Scenario Name', 'Error message_old', 'Error message_new']
         st.dataframe(fail_to_fail_scenarios[columns_to_display1])
         csv = convert_df(fail_to_fail_scenarios[columns_to_display1])
         st.download_button("Download Consistent Failures as CSV", data=csv, file_name='consistent_failures.csv', mime='text/csv')
     with tab2:
         st.write(f"Failing scenarios Count: {pass_fail_count}")
+        columns_to_display2 = ['Functional area', 'Scenario Name', 'Error message_new', 'Time spent_old', 'Time spent_new']
         st.dataframe(pass_to_fail_scenarios[columns_to_display2])
         csv = convert_df(pass_to_fail_scenarios[columns_to_display2])
         st.download_button("Download New Failures as CSV", data=csv, file_name='new_failures.csv', mime='text/csv')
     with tab3:
         st.write(f"Passing scenarios Count: {pass_count}")
+        columns_to_display3 = ['Functional area', 'Scenario Name', 'Error message_old', 'Time spent_old', 'Time spent_new']
         st.dataframe(fail_to_pass_scenarios[columns_to_display3])
         csv = convert_df(fail_to_pass_scenarios[columns_to_display3])
         st.download_button("Download New Passes as CSV", data=csv, file_name='new_passes.csv', mime='text/csv')
 def main():
+    st.title("File Comparison Tool")
     st.markdown("""
+    This tool compares two files and highlights the differences in the scenarios.
+    Please upload the older and newer files below.
     """)
     col1, col2 = st.columns(2)
     with col1:
+        uploaded_file1 = st.file_uploader("Upload the older file", type=['csv', 'xlsx'], key='uploader1')
     with col2:
+        uploaded_file2 = st.file_uploader("Upload the newer file", type=['csv', 'xlsx'], key='uploader2')
     if uploaded_file1 is not None and uploaded_file2 is not None:
         with st.spinner('Processing...'):

weekly.py CHANGED Viewed

@@ -3,10 +3,44 @@ import streamlit as st
 import plotly.graph_objects as go
 from pre import preprocess_uploaded_file
 from datetime import datetime
 def generate_weekly_report(uploaded_files):
     if not uploaded_files:
-        st.error("No files uploaded. Please upload CSV files for analysis.")
         return
     # Set pandas option to use Copy-on-Write
@@ -15,19 +49,10 @@ def generate_weekly_report(uploaded_files):
     combined_data = pd.DataFrame()
     for uploaded_file in uploaded_files:
         data = preprocess_uploaded_file(uploaded_file)
-        # Extract date and time from filename
-        filename_parts = uploaded_file.name.split('_')
-        if len(filename_parts) >= 4:
-            file_datetime_str = f"{filename_parts[-2]}_{filename_parts[-1].split('.')[0]}"
-            try:
-                file_datetime = datetime.strptime(file_datetime_str, '%Y%m%d_%H%M%S')
-                file_date = file_datetime.date()
-            except ValueError:
-                st.error(f"Invalid date format in filename: {uploaded_file.name}")
-                return
-        else:
-            st.error(f"Filename does not contain expected date format: {uploaded_file.name}")
-            return
         data['File Date'] = file_date
         combined_data = pd.concat([combined_data, data], ignore_index=True)
@@ -140,7 +165,7 @@ def generate_weekly_report(uploaded_files):
                                       (filtered_data['Environment'] == selected_env)]
         if not day_scenarios.empty:
-            st.dataframe(day_scenarios[['Functional area', 'Scenario name', 'Error message', 'Time spent(m:s)']])
         else:
             st.write("No failing scenarios found for the selected date and environment.")

 import plotly.graph_objects as go
 from pre import preprocess_uploaded_file
 from datetime import datetime
+import re
+def extract_date_from_filename(filename):
+    """Extract date from various filename formats"""
+    # Try pattern for "name_YYYYMMDD_HHMMSS" format
+    pattern1 = r'_(\d{8})_(\d{6})'
+    match1 = re.search(pattern1, filename)
+    if match1:
+        try:
+            return datetime.strptime(f"{match1.group(1)}_{match1.group(2)}", '%Y%m%d_%H%M%S')
+        except ValueError:
+            pass
+    # Try pattern for "name_YYYYMMDD" format
+    pattern2 = r'_(\d{8})'
+    match2 = re.search(pattern2, filename)
+    if match2:
+        try:
+            return datetime.strptime(match2.group(1), '%Y%m%d')
+        except ValueError:
+            pass
+    # Try pattern for "nameYYYYMMDD" format (e.g. batch_20250224)
+    pattern3 = r'(\d{8})'
+    match3 = re.search(pattern3, filename)
+    if match3:
+        try:
+            return datetime.strptime(match3.group(1), '%Y%m%d')
+        except ValueError:
+            pass
+    # If no patterns match, return current date with a warning
+    st.warning(f"Could not extract date from filename: {filename}. Using current date instead.")
+    return datetime.now()
 def generate_weekly_report(uploaded_files):
     if not uploaded_files:
+        st.error("No files uploaded. Please upload files for analysis.")
         return
     # Set pandas option to use Copy-on-Write
     combined_data = pd.DataFrame()
     for uploaded_file in uploaded_files:
         data = preprocess_uploaded_file(uploaded_file)
+        # Extract date from filename
+        file_datetime = extract_date_from_filename(uploaded_file.name)
+        file_date = file_datetime.date()
         data['File Date'] = file_date
         combined_data = pd.concat([combined_data, data], ignore_index=True)
                                       (filtered_data['Environment'] == selected_env)]
         if not day_scenarios.empty:
+            st.dataframe(day_scenarios[['Functional area', 'Scenario Name', 'Error Message', 'Time spent(m:s)']])
         else:
             st.write("No failing scenarios found for the selected date and environment.")