Spaces:
Sleeping
Sleeping
Commit
·
3ff5801
1
Parent(s):
4e4d72e
changes xlsx
Browse files- README.md +96 -1
- app.py +11 -12
- multi_env_compare.py +11 -11
- multiple.py +5 -5
- pre.py +99 -20
- requirements.txt +6 -6
- second.py +19 -16
- weekly.py +40 -15
README.md
CHANGED
@@ -7,4 +7,99 @@ sdk: streamlit
|
|
7 |
sdk_version: 1.28.1
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
-
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
sdk_version: 1.28.1
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
+
---
|
11 |
+
|
12 |
+
# Batch Run Analyzer
|
13 |
+
|
14 |
+
A comprehensive Streamlit application for analyzing batch run results from CSV or XLSX files, visualizing pass/fail statistics, and comparing runs across different environments.
|
15 |
+
|
16 |
+
## Features
|
17 |
+
|
18 |
+
- Support for both CSV and XLSX file formats
|
19 |
+
- Multiple analysis modes:
|
20 |
+
- **Multi**: Analyze multiple files from different environments
|
21 |
+
- **Compare**: Compare two files to identify differences in scenario outcomes
|
22 |
+
- **Weekly**: Generate weekly trend reports
|
23 |
+
- **Multi-Env Compare**: Compare scenarios across multiple environments
|
24 |
+
- Detailed statistics on passing and failing scenarios
|
25 |
+
- Visual charts for failure counts by functional area
|
26 |
+
- Interactive filtering by functional area and status
|
27 |
+
- Time spent analysis per functional area
|
28 |
+
- Error Message analysis
|
29 |
+
|
30 |
+
## Setup and Installation
|
31 |
+
|
32 |
+
1. Clone this repository:
|
33 |
+
```
|
34 |
+
git clone <repository-url>
|
35 |
+
cd batch-run-csv-analyser
|
36 |
+
```
|
37 |
+
|
38 |
+
2. Install the required dependencies:
|
39 |
+
```
|
40 |
+
pip install -r requirements.txt
|
41 |
+
```
|
42 |
+
|
43 |
+
3. Run the application:
|
44 |
+
```
|
45 |
+
streamlit run app.py
|
46 |
+
```
|
47 |
+
|
48 |
+
## File Format Support
|
49 |
+
|
50 |
+
### CSV Format (Legacy)
|
51 |
+
The application still supports the original CSV format with the following columns:
|
52 |
+
- Functional area
|
53 |
+
- Scenario Name
|
54 |
+
- Start datetime
|
55 |
+
- End datetime
|
56 |
+
- Status
|
57 |
+
- Error Message
|
58 |
+
|
59 |
+
### XLSX Format (New)
|
60 |
+
The application now supports XLSX files with step-level data:
|
61 |
+
- Feature Name
|
62 |
+
- Scenario Name
|
63 |
+
- Step
|
64 |
+
- Result
|
65 |
+
- Time Stamp
|
66 |
+
- Duration (ms)
|
67 |
+
- Error Message
|
68 |
+
|
69 |
+
The application will automatically detect the file format based on the file extension and process it accordingly.
|
70 |
+
|
71 |
+
## Usage
|
72 |
+
|
73 |
+
1. Start the application with `streamlit run app.py`
|
74 |
+
2. Use the sidebar to select the desired analysis mode
|
75 |
+
3. Upload the necessary files based on the selected mode
|
76 |
+
4. Follow the on-screen instructions for filtering and analysis
|
77 |
+
|
78 |
+
## Analysis Modes
|
79 |
+
|
80 |
+
### Multi Mode
|
81 |
+
Upload files from multiple environments for individual analysis. View statistics, filter by functional area, and see charts of failing scenarios.
|
82 |
+
|
83 |
+
### Compare Mode
|
84 |
+
Upload two files to compare scenario statuses between them. The application will identify:
|
85 |
+
- Consistent failures (failed in both files)
|
86 |
+
- New failures (passed in the older file, failed in the newer)
|
87 |
+
- New passes (failed in the older file, passed in the newer)
|
88 |
+
|
89 |
+
### Weekly Mode
|
90 |
+
Upload files from multiple dates to see trend reports. Filter by environment and functional area, and view detailed statistics for each day.
|
91 |
+
|
92 |
+
### Multi-Env Compare Mode
|
93 |
+
Compare scenarios across multiple environments to identify inconsistencies in test coverage.
|
94 |
+
|
95 |
+
## Notes
|
96 |
+
|
97 |
+
- Filename format is important for date extraction in Weekly mode. The application will try to extract dates using various patterns like `name_YYYYMMDD_HHMMSS`, `name_YYYYMMDD`, or any 8-digit sequence resembling a date.
|
98 |
+
- For XLSX files, all steps within a scenario are aggregated to determine the overall scenario status.
|
99 |
+
|
100 |
+
## Troubleshooting
|
101 |
+
|
102 |
+
If you encounter issues:
|
103 |
+
1. Ensure the file format follows the expected structure
|
104 |
+
2. Check the logs for specific error messages
|
105 |
+
3. Try processing smaller files first to verify functionality
|
app.py
CHANGED
@@ -13,9 +13,8 @@ from multi_env_compare import multi_env_compare_main
|
|
13 |
def single_main(uploaded_file):
|
14 |
|
15 |
if uploaded_file is not None:
|
16 |
-
# Process the
|
17 |
data = preprocess_uploaded_file(uploaded_file)
|
18 |
-
# st.write(data)
|
19 |
|
20 |
# Display scenarios with status "failed" grouped by functional area
|
21 |
failed_scenarios = data[data['Status'] == 'FAILED']
|
@@ -70,9 +69,9 @@ def single_main(uploaded_file):
|
|
70 |
|
71 |
# Filter scenarios based on selected functional area
|
72 |
if selected_status == 'Failed':
|
73 |
-
grouped_filtered_scenarios = filtered_scenarios.groupby('Functional area')[['Scenario
|
74 |
elif selected_status == 'Passed':
|
75 |
-
grouped_filtered_scenarios = filtered_scenarios.groupby('Functional area')[['Scenario
|
76 |
else:
|
77 |
grouped_filtered_scenarios = None
|
78 |
|
@@ -81,7 +80,7 @@ def single_main(uploaded_file):
|
|
81 |
grouped_filtered_scenarios.drop(columns=['level_1'], inplace=True)
|
82 |
grouped_filtered_scenarios.index = grouped_filtered_scenarios.index + 1
|
83 |
|
84 |
-
st.
|
85 |
|
86 |
# Sort the average time spent table by start datetime
|
87 |
average_time_spent_seconds = average_time_spent_seconds.sort_values(by='Start datetime')
|
@@ -136,14 +135,14 @@ def main():
|
|
136 |
if st.session_state["mode"] == "multi":
|
137 |
multiple_main()
|
138 |
elif st.session_state["mode"] == "compare":
|
139 |
-
st.sidebar.markdown("### Upload
|
140 |
upload_option = st.sidebar.radio("Upload method", ["Single uploader", "Two separate uploaders"])
|
141 |
|
142 |
if upload_option == "Single uploader":
|
143 |
-
uploaded_files = st.sidebar.file_uploader("Upload CSV files for comparison", type="csv", accept_multiple_files=True)
|
144 |
if uploaded_files:
|
145 |
if len(uploaded_files) < 2:
|
146 |
-
st.warning("Please upload at least two
|
147 |
elif len(uploaded_files) > 2:
|
148 |
st.warning("More than two files uploaded. Only the first two will be used for comparison.")
|
149 |
else:
|
@@ -153,18 +152,18 @@ def main():
|
|
153 |
else:
|
154 |
col1, col2 = st.sidebar.columns(2)
|
155 |
with col1:
|
156 |
-
uploaded_file1 = st.file_uploader("Upload older CSV file", type="csv", key="file1")
|
157 |
with col2:
|
158 |
-
uploaded_file2 = st.file_uploader("Upload newer CSV file", type="csv", key="file2")
|
159 |
|
160 |
if uploaded_file1 is not None and uploaded_file2 is not None:
|
161 |
with st.spinner('Processing...'):
|
162 |
double_main(uploaded_file1, uploaded_file2)
|
163 |
st.success('Comparison Complete!')
|
164 |
elif uploaded_file1 is not None or uploaded_file2 is not None:
|
165 |
-
st.warning("Please upload both
|
166 |
elif st.session_state["mode"] == "weekly":
|
167 |
-
uploaded_files = st.sidebar.file_uploader("Upload CSV files for Weekly Report", type="csv", accept_multiple_files=True)
|
168 |
if uploaded_files:
|
169 |
generate_weekly_report(uploaded_files)
|
170 |
elif st.session_state["mode"] == "multi-env compare":
|
|
|
13 |
def single_main(uploaded_file):
|
14 |
|
15 |
if uploaded_file is not None:
|
16 |
+
# Process the file with header
|
17 |
data = preprocess_uploaded_file(uploaded_file)
|
|
|
18 |
|
19 |
# Display scenarios with status "failed" grouped by functional area
|
20 |
failed_scenarios = data[data['Status'] == 'FAILED']
|
|
|
69 |
|
70 |
# Filter scenarios based on selected functional area
|
71 |
if selected_status == 'Failed':
|
72 |
+
grouped_filtered_scenarios = filtered_scenarios.groupby('Functional area')[['Scenario Name', 'Error Message','Time spent(m:s)']].apply(lambda x: x.reset_index(drop=True))
|
73 |
elif selected_status == 'Passed':
|
74 |
+
grouped_filtered_scenarios = filtered_scenarios.groupby('Functional area')[['Scenario Name', 'Time spent(m:s)']].apply(lambda x: x.reset_index(drop=True))
|
75 |
else:
|
76 |
grouped_filtered_scenarios = None
|
77 |
|
|
|
80 |
grouped_filtered_scenarios.drop(columns=['level_1'], inplace=True)
|
81 |
grouped_filtered_scenarios.index = grouped_filtered_scenarios.index + 1
|
82 |
|
83 |
+
st.dataframe(grouped_filtered_scenarios)
|
84 |
|
85 |
# Sort the average time spent table by start datetime
|
86 |
average_time_spent_seconds = average_time_spent_seconds.sort_values(by='Start datetime')
|
|
|
135 |
if st.session_state["mode"] == "multi":
|
136 |
multiple_main()
|
137 |
elif st.session_state["mode"] == "compare":
|
138 |
+
st.sidebar.markdown("### Upload Files for Comparison")
|
139 |
upload_option = st.sidebar.radio("Upload method", ["Single uploader", "Two separate uploaders"])
|
140 |
|
141 |
if upload_option == "Single uploader":
|
142 |
+
uploaded_files = st.sidebar.file_uploader("Upload CSV or XLSX files for comparison", type=["csv", "xlsx"], accept_multiple_files=True)
|
143 |
if uploaded_files:
|
144 |
if len(uploaded_files) < 2:
|
145 |
+
st.warning("Please upload at least two files for comparison.")
|
146 |
elif len(uploaded_files) > 2:
|
147 |
st.warning("More than two files uploaded. Only the first two will be used for comparison.")
|
148 |
else:
|
|
|
152 |
else:
|
153 |
col1, col2 = st.sidebar.columns(2)
|
154 |
with col1:
|
155 |
+
uploaded_file1 = st.file_uploader("Upload older CSV/XLSX file", type=["csv", "xlsx"], key="file1")
|
156 |
with col2:
|
157 |
+
uploaded_file2 = st.file_uploader("Upload newer CSV/XLSX file", type=["csv", "xlsx"], key="file2")
|
158 |
|
159 |
if uploaded_file1 is not None and uploaded_file2 is not None:
|
160 |
with st.spinner('Processing...'):
|
161 |
double_main(uploaded_file1, uploaded_file2)
|
162 |
st.success('Comparison Complete!')
|
163 |
elif uploaded_file1 is not None or uploaded_file2 is not None:
|
164 |
+
st.warning("Please upload both files for comparison.")
|
165 |
elif st.session_state["mode"] == "weekly":
|
166 |
+
uploaded_files = st.sidebar.file_uploader("Upload CSV or XLSX files for Weekly Report", type=["csv", "xlsx"], accept_multiple_files=True)
|
167 |
if uploaded_files:
|
168 |
generate_weekly_report(uploaded_files)
|
169 |
elif st.session_state["mode"] == "multi-env compare":
|
multi_env_compare.py
CHANGED
@@ -13,7 +13,7 @@ def find_different_scenarios(grouped_data, area):
|
|
13 |
area_data = grouped_data[grouped_data['Functional area'] == area]
|
14 |
|
15 |
# Get scenarios for each environment
|
16 |
-
scenarios_by_env = {env: set(area_data[area_data['Environment'] == env]['Scenario
|
17 |
for env in area_data['Environment'].unique()}
|
18 |
|
19 |
# Find scenarios that are in one environment but not the other
|
@@ -71,8 +71,8 @@ def perform_multi_env_analysis(uploaded_dataframes):
|
|
71 |
(combined_data['Functional area'].isin(selected_functional_areas))
|
72 |
]
|
73 |
|
74 |
-
# Group data by Environment, Functional area, Scenario
|
75 |
-
grouped_data = filtered_data.groupby(['Environment', 'Functional area', 'Scenario
|
76 |
|
77 |
# Ensure 'PASSED' and 'FAILED' columns exist
|
78 |
if 'PASSED' not in grouped_data.columns:
|
@@ -83,11 +83,11 @@ def perform_multi_env_analysis(uploaded_dataframes):
|
|
83 |
# Calculate total scenarios
|
84 |
grouped_data['Total'] = grouped_data['PASSED'] + grouped_data['FAILED']
|
85 |
|
86 |
-
# Reset index to make Environment, Functional area, and Scenario
|
87 |
grouped_data = grouped_data.reset_index()
|
88 |
|
89 |
# Reorder columns
|
90 |
-
grouped_data = grouped_data[['Environment', 'Functional area', 'Scenario
|
91 |
|
92 |
# Display summary statistics
|
93 |
st.write("### Summary Statistics")
|
@@ -103,7 +103,7 @@ def perform_multi_env_analysis(uploaded_dataframes):
|
|
103 |
# Display the DataFrame
|
104 |
st.dataframe(summary_with_headers)
|
105 |
# Define scenarios_by_env here
|
106 |
-
scenarios_by_env = {env: set(grouped_data[grouped_data['Environment'] == env]['Scenario
|
107 |
|
108 |
missing_scenarios = []
|
109 |
mismatched_scenarios = []
|
@@ -112,7 +112,7 @@ def perform_multi_env_analysis(uploaded_dataframes):
|
|
112 |
|
113 |
if len(selected_environments) > 1:
|
114 |
# Group data by Environment and Functional area, count scenarios
|
115 |
-
scenario_counts = filtered_data.groupby(['Environment', 'Functional area'])['Scenario
|
116 |
|
117 |
# Calculate the difference between max and min counts for each functional area
|
118 |
count_diff = scenario_counts.max() - scenario_counts.min()
|
@@ -140,7 +140,7 @@ def perform_multi_env_analysis(uploaded_dataframes):
|
|
140 |
|
141 |
# Get scenarios for each environment
|
142 |
scenarios_by_env = {env: set(filtered_data[(filtered_data['Environment'] == env) &
|
143 |
-
(filtered_data['Functional area'] == selected_area)]['Scenario
|
144 |
for env in selected_environments}
|
145 |
|
146 |
# Find scenarios that are different between environments
|
@@ -188,10 +188,10 @@ def multi_env_compare_main():
|
|
188 |
|
189 |
# Loop through the number of environments and create file uploaders
|
190 |
for i in range(num_environments):
|
191 |
-
uploaded_files = st.file_uploader(f"Upload CSV files for Environment {i + 1}", type="csv", accept_multiple_files=True)
|
192 |
|
193 |
for uploaded_file in uploaded_files:
|
194 |
-
# Preprocess the uploaded
|
195 |
data = preprocess_uploaded_file(uploaded_file)
|
196 |
|
197 |
# Append the dataframe to the list
|
@@ -202,7 +202,7 @@ def multi_env_compare_main():
|
|
202 |
# Perform analysis for uploaded data
|
203 |
perform_multi_env_analysis(uploaded_dataframes)
|
204 |
else:
|
205 |
-
st.write("Please upload at least one
|
206 |
|
207 |
if __name__ == "__main__":
|
208 |
multi_env_compare_main()
|
|
|
13 |
area_data = grouped_data[grouped_data['Functional area'] == area]
|
14 |
|
15 |
# Get scenarios for each environment
|
16 |
+
scenarios_by_env = {env: set(area_data[area_data['Environment'] == env]['Scenario Name'])
|
17 |
for env in area_data['Environment'].unique()}
|
18 |
|
19 |
# Find scenarios that are in one environment but not the other
|
|
|
71 |
(combined_data['Functional area'].isin(selected_functional_areas))
|
72 |
]
|
73 |
|
74 |
+
# Group data by Environment, Functional area, Scenario Name, and Status
|
75 |
+
grouped_data = filtered_data.groupby(['Environment', 'Functional area', 'Scenario Name', 'Status']).size().unstack(fill_value=0)
|
76 |
|
77 |
# Ensure 'PASSED' and 'FAILED' columns exist
|
78 |
if 'PASSED' not in grouped_data.columns:
|
|
|
83 |
# Calculate total scenarios
|
84 |
grouped_data['Total'] = grouped_data['PASSED'] + grouped_data['FAILED']
|
85 |
|
86 |
+
# Reset index to make Environment, Functional area, and Scenario Name as columns
|
87 |
grouped_data = grouped_data.reset_index()
|
88 |
|
89 |
# Reorder columns
|
90 |
+
grouped_data = grouped_data[['Environment', 'Functional area', 'Scenario Name', 'Total', 'PASSED', 'FAILED']]
|
91 |
|
92 |
# Display summary statistics
|
93 |
st.write("### Summary Statistics")
|
|
|
103 |
# Display the DataFrame
|
104 |
st.dataframe(summary_with_headers)
|
105 |
# Define scenarios_by_env here
|
106 |
+
scenarios_by_env = {env: set(grouped_data[grouped_data['Environment'] == env]['Scenario Name']) for env in selected_environments}
|
107 |
|
108 |
missing_scenarios = []
|
109 |
mismatched_scenarios = []
|
|
|
112 |
|
113 |
if len(selected_environments) > 1:
|
114 |
# Group data by Environment and Functional area, count scenarios
|
115 |
+
scenario_counts = filtered_data.groupby(['Environment', 'Functional area'])['Scenario Name'].nunique().unstack(fill_value=0)
|
116 |
|
117 |
# Calculate the difference between max and min counts for each functional area
|
118 |
count_diff = scenario_counts.max() - scenario_counts.min()
|
|
|
140 |
|
141 |
# Get scenarios for each environment
|
142 |
scenarios_by_env = {env: set(filtered_data[(filtered_data['Environment'] == env) &
|
143 |
+
(filtered_data['Functional area'] == selected_area)]['Scenario Name'])
|
144 |
for env in selected_environments}
|
145 |
|
146 |
# Find scenarios that are different between environments
|
|
|
188 |
|
189 |
# Loop through the number of environments and create file uploaders
|
190 |
for i in range(num_environments):
|
191 |
+
uploaded_files = st.file_uploader(f"Upload CSV or XLSX files for Environment {i + 1}", type=["csv", "xlsx"], accept_multiple_files=True)
|
192 |
|
193 |
for uploaded_file in uploaded_files:
|
194 |
+
# Preprocess the uploaded file
|
195 |
data = preprocess_uploaded_file(uploaded_file)
|
196 |
|
197 |
# Append the dataframe to the list
|
|
|
202 |
# Perform analysis for uploaded data
|
203 |
perform_multi_env_analysis(uploaded_dataframes)
|
204 |
else:
|
205 |
+
st.write("Please upload at least one file.")
|
206 |
|
207 |
if __name__ == "__main__":
|
208 |
multi_env_compare_main()
|
multiple.py
CHANGED
@@ -69,9 +69,9 @@ def perform_analysis(uploaded_dataframes):
|
|
69 |
|
70 |
# Filter scenarios based on selected functional area
|
71 |
if selected_status == 'Failed':
|
72 |
-
grouped_filtered_scenarios = filtered_scenarios.groupby('Environment')[['Functional area', 'Scenario
|
73 |
elif selected_status == 'Passed':
|
74 |
-
grouped_filtered_scenarios = filtered_scenarios.groupby('Functional area')[['Scenario
|
75 |
else:
|
76 |
grouped_filtered_scenarios = None
|
77 |
grouped_filtered_scenarios.reset_index(inplace=True)
|
@@ -127,10 +127,10 @@ def multiple_main():
|
|
127 |
|
128 |
# Loop through the number of environments and create file uploaders
|
129 |
for i in range(num_environments):
|
130 |
-
uploaded_files = st.file_uploader(f"Upload CSV files for Environment {i + 1}", type="csv", accept_multiple_files=True)
|
131 |
|
132 |
for uploaded_file in uploaded_files:
|
133 |
-
# Preprocess the uploaded
|
134 |
data = preprocess_uploaded_file(uploaded_file)
|
135 |
|
136 |
# Append the dataframe to the list
|
@@ -141,6 +141,6 @@ def multiple_main():
|
|
141 |
# Perform analysis for uploaded data
|
142 |
perform_analysis(uploaded_dataframes)
|
143 |
else:
|
144 |
-
st.write("Please upload at least one
|
145 |
|
146 |
pass
|
|
|
69 |
|
70 |
# Filter scenarios based on selected functional area
|
71 |
if selected_status == 'Failed':
|
72 |
+
grouped_filtered_scenarios = filtered_scenarios.groupby('Environment')[['Functional area', 'Scenario Name', 'Error Message','Time spent(m:s)','Start datetime']].apply(lambda x: x.reset_index(drop=True))
|
73 |
elif selected_status == 'Passed':
|
74 |
+
grouped_filtered_scenarios = filtered_scenarios.groupby('Functional area')[['Scenario Name', 'Time spent(m:s)']].apply(lambda x: x.reset_index(drop=True))
|
75 |
else:
|
76 |
grouped_filtered_scenarios = None
|
77 |
grouped_filtered_scenarios.reset_index(inplace=True)
|
|
|
127 |
|
128 |
# Loop through the number of environments and create file uploaders
|
129 |
for i in range(num_environments):
|
130 |
+
uploaded_files = st.file_uploader(f"Upload CSV or XLSX files for Environment {i + 1}", type=["csv", "xlsx"], accept_multiple_files=True)
|
131 |
|
132 |
for uploaded_file in uploaded_files:
|
133 |
+
# Preprocess the uploaded file
|
134 |
data = preprocess_uploaded_file(uploaded_file)
|
135 |
|
136 |
# Append the dataframe to the list
|
|
|
141 |
# Perform analysis for uploaded data
|
142 |
perform_analysis(uploaded_dataframes)
|
143 |
else:
|
144 |
+
st.write("Please upload at least one file.")
|
145 |
|
146 |
pass
|
pre.py
CHANGED
@@ -2,8 +2,12 @@ import pandas as pd
|
|
2 |
import streamlit as st
|
3 |
import csv
|
4 |
import io
|
|
|
|
|
|
|
5 |
|
6 |
def preprocess_csv(input_bytes):
|
|
|
7 |
text = input_bytes.decode() # Decode bytes to text
|
8 |
output = io.StringIO()
|
9 |
writer = csv.writer(output)
|
@@ -28,6 +32,74 @@ def load_data(file):
|
|
28 |
data = pd.read_csv(file, header=None, names=column_names)
|
29 |
return data
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
def fill_missing_data(data, column_index, value):
|
32 |
data.iloc[:, column_index] = data.iloc[:, column_index].fillna(value)
|
33 |
return data
|
@@ -37,23 +109,30 @@ def to_camel_case(s):
|
|
37 |
parts = s.split('_')
|
38 |
return ''.join([part.capitalize() for part in parts])
|
39 |
|
40 |
-
# Define the function to preprocess a CSV
|
41 |
def preprocess_uploaded_file(uploaded_file):
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
-
|
56 |
-
|
57 |
|
58 |
return data
|
59 |
|
@@ -66,13 +145,13 @@ def add_app_description():
|
|
66 |
|
67 |
if is_selected:
|
68 |
with st.expander('Show App Description'):
|
69 |
-
st.markdown("Welcome to DataLink Compare. This tool allows you to analyze
|
70 |
|
71 |
st.markdown("### Instructions:")
|
72 |
-
st.write("1. Upload your CSV file using the file uploader on the sidebar.")
|
73 |
-
st.write("2. Choose between 'Multi' and 'Compare' mode using the
|
74 |
-
st.write("3. In 'Multi' mode, you can upload and analyze multiple
|
75 |
-
st.write("4. In 'Compare' mode, you can upload two
|
76 |
|
77 |
st.markdown("### Features:")
|
78 |
st.write("- View statistics of passing and failing scenarios.")
|
|
|
2 |
import streamlit as st
|
3 |
import csv
|
4 |
import io
|
5 |
+
import openpyxl # Add this import for Excel handling
|
6 |
+
from datetime import datetime
|
7 |
+
import re
|
8 |
|
9 |
def preprocess_csv(input_bytes):
|
10 |
+
# Keep this for backward compatibility with CSV files
|
11 |
text = input_bytes.decode() # Decode bytes to text
|
12 |
output = io.StringIO()
|
13 |
writer = csv.writer(output)
|
|
|
32 |
data = pd.read_csv(file, header=None, names=column_names)
|
33 |
return data
|
34 |
|
35 |
+
@st.cache_data
|
36 |
+
def preprocess_xlsx(uploaded_file):
|
37 |
+
"""Process Excel file with step-level data and convert to scenario-level summary"""
|
38 |
+
# Define data types for columns
|
39 |
+
dtype_dict = {
|
40 |
+
'Feature Name': 'string',
|
41 |
+
'Scenario Name': 'string',
|
42 |
+
'Total Time Taken (ms)': 'float64'
|
43 |
+
}
|
44 |
+
|
45 |
+
# Read both the first sheet for error messages and "Time Taken" sheet
|
46 |
+
excel_file = pd.ExcelFile(uploaded_file, engine='openpyxl')
|
47 |
+
|
48 |
+
# Read error messages from first sheet
|
49 |
+
error_df = pd.read_excel(excel_file, sheet_name=0)
|
50 |
+
|
51 |
+
# Read time taken data
|
52 |
+
df = pd.read_excel(
|
53 |
+
excel_file,
|
54 |
+
sheet_name='Time Taken',
|
55 |
+
dtype=dtype_dict
|
56 |
+
)
|
57 |
+
|
58 |
+
# Convert Failed Scenario column to boolean after reading
|
59 |
+
df['Failed Scenario'] = df['Failed Scenario'].astype(str).map({'TRUE': True, 'FALSE': False})
|
60 |
+
|
61 |
+
# Get error messages from the first sheet
|
62 |
+
error_messages = error_df[['Scenario Name', 'Error message']].copy()
|
63 |
+
|
64 |
+
# Extract date from filename (e.g., RI2211_batch_20250225_27031.xlsx)
|
65 |
+
filename = uploaded_file.name
|
66 |
+
date_match = re.search(r'_(\d{8})_', filename)
|
67 |
+
if date_match:
|
68 |
+
date_str = date_match.group(1)
|
69 |
+
file_date = datetime.strptime(date_str, '%Y%m%d').date()
|
70 |
+
else:
|
71 |
+
st.warning(f"Could not extract date from filename: {filename}. Using current date.")
|
72 |
+
file_date = datetime.now().date()
|
73 |
+
|
74 |
+
# Extract environment from filename
|
75 |
+
if any(pattern in filename for pattern in ['_batch_', '_fin_', '_priority_', '_Puppeteer_']):
|
76 |
+
environment = filename.split('_')[0]
|
77 |
+
else:
|
78 |
+
environment = filename.split('.')[0]
|
79 |
+
|
80 |
+
# Create result dataframe
|
81 |
+
result_df = pd.DataFrame({
|
82 |
+
'Functional area': df['Feature Name'],
|
83 |
+
'Scenario name': df['Scenario Name'],
|
84 |
+
'Status': df['Failed Scenario'].map({True: 'FAILED', False: 'PASSED'}),
|
85 |
+
'Time spent': df['Total Time Taken (ms)'] / 1000 # Convert ms to seconds
|
86 |
+
})
|
87 |
+
|
88 |
+
# Merge error messages with result dataframe
|
89 |
+
result_df = result_df.merge(error_messages, on='Scenario name', how='left')
|
90 |
+
|
91 |
+
# Add environment column
|
92 |
+
result_df['Environment'] = environment
|
93 |
+
|
94 |
+
# Calculate formatted time spent
|
95 |
+
result_df['Time spent(m:s)'] = pd.to_datetime(result_df['Time spent'], unit='s').dt.strftime('%M:%S')
|
96 |
+
|
97 |
+
# Add start datetime (using file date since actual start time isn't available in this sheet)
|
98 |
+
result_df['Start datetime'] = pd.to_datetime(file_date)
|
99 |
+
result_df['End datetime'] = result_df['Start datetime'] + pd.to_timedelta(result_df['Time spent'], unit='s')
|
100 |
+
|
101 |
+
return result_df
|
102 |
+
|
103 |
def fill_missing_data(data, column_index, value):
|
104 |
data.iloc[:, column_index] = data.iloc[:, column_index].fillna(value)
|
105 |
return data
|
|
|
109 |
parts = s.split('_')
|
110 |
return ''.join([part.capitalize() for part in parts])
|
111 |
|
112 |
+
# Define the function to preprocess a file (CSV or XLSX)
|
113 |
def preprocess_uploaded_file(uploaded_file):
|
114 |
+
with st.spinner(f'Processing {uploaded_file.name}...'):
|
115 |
+
# Determine file type based on extension
|
116 |
+
if uploaded_file.name.lower().endswith('.xlsx'):
|
117 |
+
data = preprocess_xlsx(uploaded_file)
|
118 |
+
else:
|
119 |
+
# Original CSV processing
|
120 |
+
file_content = uploaded_file.read()
|
121 |
+
processed_output = preprocess_csv(file_content)
|
122 |
+
processed_file = io.StringIO(processed_output.getvalue())
|
123 |
+
data = load_data(processed_file)
|
124 |
+
data = fill_missing_data(data, 4, 0)
|
125 |
+
data['Start datetime'] = pd.to_datetime(data['Start datetime'], dayfirst=True, errors='coerce')
|
126 |
+
data['End datetime'] = pd.to_datetime(data['End datetime'], dayfirst=True, errors='coerce')
|
127 |
+
data['Time spent'] = (data['End datetime'] - data['Start datetime']).dt.total_seconds()
|
128 |
+
data['Time spent(m:s)'] = pd.to_datetime(data['Time spent'], unit='s').dt.strftime('%M:%S')
|
129 |
+
|
130 |
+
# Extract environment name from filename
|
131 |
+
filename = uploaded_file.name
|
132 |
+
environment = filename.split('_Puppeteer')[0]
|
133 |
|
134 |
+
# Add environment column to the dataframe
|
135 |
+
data['Environment'] = environment
|
136 |
|
137 |
return data
|
138 |
|
|
|
145 |
|
146 |
if is_selected:
|
147 |
with st.expander('Show App Description'):
|
148 |
+
st.markdown("Welcome to DataLink Compare. This tool allows you to analyze batch run reports and provides insights into their statuses, processing times, and more. You can also compare two files to identify differences and similarities between them.")
|
149 |
|
150 |
st.markdown("### Instructions:")
|
151 |
+
st.write("1. Upload your CSV or XLSX file using the file uploader on the sidebar.")
|
152 |
+
st.write("2. Choose between 'Multi', 'Compare', 'Weekly', and 'Multi-Env Compare' mode using the dropdown on the sidebar.")
|
153 |
+
st.write("3. In 'Multi' mode, you can upload and analyze multiple files for individual environments.")
|
154 |
+
st.write("4. In 'Compare' mode, you can upload two files to compare them.")
|
155 |
|
156 |
st.markdown("### Features:")
|
157 |
st.write("- View statistics of passing and failing scenarios.")
|
requirements.txt
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
|
|
1 |
+
pandas>=2.0.0
|
2 |
+
streamlit>=1.0.0
|
3 |
+
plotly>=5.0.0
|
4 |
+
matplotlib>=3.0.0
|
5 |
+
numpy>=1.20.0
|
6 |
+
openpyxl>=3.0.0
|
second.py
CHANGED
@@ -8,12 +8,15 @@ def convert_df(df):
|
|
8 |
|
9 |
def double_main(uploaded_file1, uploaded_file2):
|
10 |
if uploaded_file1 is None or uploaded_file2 is None:
|
11 |
-
st.warning("Please upload both
|
12 |
return
|
13 |
|
14 |
-
# Preprocess the uploaded CSV
|
15 |
-
|
16 |
-
|
|
|
|
|
|
|
17 |
|
18 |
# Determine which file is older and newer
|
19 |
if data_1['Start datetime'].min() < data_2['Start datetime'].min():
|
@@ -30,11 +33,11 @@ def double_main(uploaded_file1, uploaded_file2):
|
|
30 |
newer_datetime = newer_df['Start datetime'].min()
|
31 |
|
32 |
# Display start datetime of each file
|
33 |
-
st.write(f"The older
|
34 |
-
st.write(f"The newer
|
35 |
|
36 |
-
# Merge dataframes on '
|
37 |
-
merged_df = pd.merge(older_df, newer_df, on=['Functional area', 'Scenario
|
38 |
|
39 |
# Filter scenarios
|
40 |
fail_to_fail_scenarios = merged_df[(merged_df['Status_old'] == 'FAILED') & (merged_df['Status_new'] == 'FAILED')]
|
@@ -98,40 +101,40 @@ def double_main(uploaded_file1, uploaded_file2):
|
|
98 |
|
99 |
with tab1:
|
100 |
st.write(f"Failing scenarios Count: {fail_count}")
|
101 |
-
columns_to_display1 = ['Functional area', 'Scenario
|
102 |
st.dataframe(fail_to_fail_scenarios[columns_to_display1])
|
103 |
csv = convert_df(fail_to_fail_scenarios[columns_to_display1])
|
104 |
st.download_button("Download Consistent Failures as CSV", data=csv, file_name='consistent_failures.csv', mime='text/csv')
|
105 |
|
106 |
with tab2:
|
107 |
st.write(f"Failing scenarios Count: {pass_fail_count}")
|
108 |
-
columns_to_display2 = ['Functional area', 'Scenario
|
109 |
st.dataframe(pass_to_fail_scenarios[columns_to_display2])
|
110 |
csv = convert_df(pass_to_fail_scenarios[columns_to_display2])
|
111 |
st.download_button("Download New Failures as CSV", data=csv, file_name='new_failures.csv', mime='text/csv')
|
112 |
|
113 |
with tab3:
|
114 |
st.write(f"Passing scenarios Count: {pass_count}")
|
115 |
-
columns_to_display3 = ['Functional area', 'Scenario
|
116 |
st.dataframe(fail_to_pass_scenarios[columns_to_display3])
|
117 |
csv = convert_df(fail_to_pass_scenarios[columns_to_display3])
|
118 |
st.download_button("Download New Passes as CSV", data=csv, file_name='new_passes.csv', mime='text/csv')
|
119 |
|
120 |
def main():
|
121 |
-
st.title("
|
122 |
|
123 |
st.markdown("""
|
124 |
-
This tool compares two
|
125 |
-
Please upload the older and newer
|
126 |
""")
|
127 |
|
128 |
col1, col2 = st.columns(2)
|
129 |
|
130 |
with col1:
|
131 |
-
uploaded_file1 = st.file_uploader("Upload the older
|
132 |
|
133 |
with col2:
|
134 |
-
uploaded_file2 = st.file_uploader("Upload the newer
|
135 |
|
136 |
if uploaded_file1 is not None and uploaded_file2 is not None:
|
137 |
with st.spinner('Processing...'):
|
|
|
8 |
|
9 |
def double_main(uploaded_file1, uploaded_file2):
|
10 |
if uploaded_file1 is None or uploaded_file2 is None:
|
11 |
+
st.warning("Please upload both files for comparison.")
|
12 |
return
|
13 |
|
14 |
+
# Preprocess the uploaded files (CSV or XLSX)
|
15 |
+
with st.spinner("Processing the first file..."):
|
16 |
+
data_1 = preprocess_uploaded_file(uploaded_file1)
|
17 |
+
|
18 |
+
with st.spinner("Processing the second file..."):
|
19 |
+
data_2 = preprocess_uploaded_file(uploaded_file2)
|
20 |
|
21 |
# Determine which file is older and newer
|
22 |
if data_1['Start datetime'].min() < data_2['Start datetime'].min():
|
|
|
33 |
newer_datetime = newer_df['Start datetime'].min()
|
34 |
|
35 |
# Display start datetime of each file
|
36 |
+
st.write(f"The older file started on {older_datetime}")
|
37 |
+
st.write(f"The newer file started on {newer_datetime}")
|
38 |
|
39 |
+
# Merge dataframes on 'Scenario Name'
|
40 |
+
merged_df = pd.merge(older_df, newer_df, on=['Functional area', 'Scenario Name'], suffixes=('_old', '_new'))
|
41 |
|
42 |
# Filter scenarios
|
43 |
fail_to_fail_scenarios = merged_df[(merged_df['Status_old'] == 'FAILED') & (merged_df['Status_new'] == 'FAILED')]
|
|
|
101 |
|
102 |
with tab1:
|
103 |
st.write(f"Failing scenarios Count: {fail_count}")
|
104 |
+
columns_to_display1 = ['Functional area', 'Scenario Name', 'Error message_old', 'Error message_new']
|
105 |
st.dataframe(fail_to_fail_scenarios[columns_to_display1])
|
106 |
csv = convert_df(fail_to_fail_scenarios[columns_to_display1])
|
107 |
st.download_button("Download Consistent Failures as CSV", data=csv, file_name='consistent_failures.csv', mime='text/csv')
|
108 |
|
109 |
with tab2:
|
110 |
st.write(f"Failing scenarios Count: {pass_fail_count}")
|
111 |
+
columns_to_display2 = ['Functional area', 'Scenario Name', 'Error message_new', 'Time spent_old', 'Time spent_new']
|
112 |
st.dataframe(pass_to_fail_scenarios[columns_to_display2])
|
113 |
csv = convert_df(pass_to_fail_scenarios[columns_to_display2])
|
114 |
st.download_button("Download New Failures as CSV", data=csv, file_name='new_failures.csv', mime='text/csv')
|
115 |
|
116 |
with tab3:
|
117 |
st.write(f"Passing scenarios Count: {pass_count}")
|
118 |
+
columns_to_display3 = ['Functional area', 'Scenario Name', 'Error message_old', 'Time spent_old', 'Time spent_new']
|
119 |
st.dataframe(fail_to_pass_scenarios[columns_to_display3])
|
120 |
csv = convert_df(fail_to_pass_scenarios[columns_to_display3])
|
121 |
st.download_button("Download New Passes as CSV", data=csv, file_name='new_passes.csv', mime='text/csv')
|
122 |
|
123 |
def main():
|
124 |
+
st.title("File Comparison Tool")
|
125 |
|
126 |
st.markdown("""
|
127 |
+
This tool compares two files and highlights the differences in the scenarios.
|
128 |
+
Please upload the older and newer files below.
|
129 |
""")
|
130 |
|
131 |
col1, col2 = st.columns(2)
|
132 |
|
133 |
with col1:
|
134 |
+
uploaded_file1 = st.file_uploader("Upload the older file", type=['csv', 'xlsx'], key='uploader1')
|
135 |
|
136 |
with col2:
|
137 |
+
uploaded_file2 = st.file_uploader("Upload the newer file", type=['csv', 'xlsx'], key='uploader2')
|
138 |
|
139 |
if uploaded_file1 is not None and uploaded_file2 is not None:
|
140 |
with st.spinner('Processing...'):
|
weekly.py
CHANGED
@@ -3,10 +3,44 @@ import streamlit as st
|
|
3 |
import plotly.graph_objects as go
|
4 |
from pre import preprocess_uploaded_file
|
5 |
from datetime import datetime
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
def generate_weekly_report(uploaded_files):
|
8 |
if not uploaded_files:
|
9 |
-
st.error("No files uploaded. Please upload
|
10 |
return
|
11 |
|
12 |
# Set pandas option to use Copy-on-Write
|
@@ -15,19 +49,10 @@ def generate_weekly_report(uploaded_files):
|
|
15 |
combined_data = pd.DataFrame()
|
16 |
for uploaded_file in uploaded_files:
|
17 |
data = preprocess_uploaded_file(uploaded_file)
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
try:
|
23 |
-
file_datetime = datetime.strptime(file_datetime_str, '%Y%m%d_%H%M%S')
|
24 |
-
file_date = file_datetime.date()
|
25 |
-
except ValueError:
|
26 |
-
st.error(f"Invalid date format in filename: {uploaded_file.name}")
|
27 |
-
return
|
28 |
-
else:
|
29 |
-
st.error(f"Filename does not contain expected date format: {uploaded_file.name}")
|
30 |
-
return
|
31 |
|
32 |
data['File Date'] = file_date
|
33 |
combined_data = pd.concat([combined_data, data], ignore_index=True)
|
@@ -140,7 +165,7 @@ def generate_weekly_report(uploaded_files):
|
|
140 |
(filtered_data['Environment'] == selected_env)]
|
141 |
|
142 |
if not day_scenarios.empty:
|
143 |
-
st.dataframe(day_scenarios[['Functional area', 'Scenario
|
144 |
else:
|
145 |
st.write("No failing scenarios found for the selected date and environment.")
|
146 |
|
|
|
3 |
import plotly.graph_objects as go
|
4 |
from pre import preprocess_uploaded_file
|
5 |
from datetime import datetime
|
6 |
+
import re
|
7 |
+
|
8 |
+
def extract_date_from_filename(filename):
|
9 |
+
"""Extract date from various filename formats"""
|
10 |
+
# Try pattern for "name_YYYYMMDD_HHMMSS" format
|
11 |
+
pattern1 = r'_(\d{8})_(\d{6})'
|
12 |
+
match1 = re.search(pattern1, filename)
|
13 |
+
if match1:
|
14 |
+
try:
|
15 |
+
return datetime.strptime(f"{match1.group(1)}_{match1.group(2)}", '%Y%m%d_%H%M%S')
|
16 |
+
except ValueError:
|
17 |
+
pass
|
18 |
+
|
19 |
+
# Try pattern for "name_YYYYMMDD" format
|
20 |
+
pattern2 = r'_(\d{8})'
|
21 |
+
match2 = re.search(pattern2, filename)
|
22 |
+
if match2:
|
23 |
+
try:
|
24 |
+
return datetime.strptime(match2.group(1), '%Y%m%d')
|
25 |
+
except ValueError:
|
26 |
+
pass
|
27 |
+
|
28 |
+
# Try pattern for "nameYYYYMMDD" format (e.g. batch_20250224)
|
29 |
+
pattern3 = r'(\d{8})'
|
30 |
+
match3 = re.search(pattern3, filename)
|
31 |
+
if match3:
|
32 |
+
try:
|
33 |
+
return datetime.strptime(match3.group(1), '%Y%m%d')
|
34 |
+
except ValueError:
|
35 |
+
pass
|
36 |
+
|
37 |
+
# If no patterns match, return current date with a warning
|
38 |
+
st.warning(f"Could not extract date from filename: {filename}. Using current date instead.")
|
39 |
+
return datetime.now()
|
40 |
|
41 |
def generate_weekly_report(uploaded_files):
|
42 |
if not uploaded_files:
|
43 |
+
st.error("No files uploaded. Please upload files for analysis.")
|
44 |
return
|
45 |
|
46 |
# Set pandas option to use Copy-on-Write
|
|
|
49 |
combined_data = pd.DataFrame()
|
50 |
for uploaded_file in uploaded_files:
|
51 |
data = preprocess_uploaded_file(uploaded_file)
|
52 |
+
|
53 |
+
# Extract date from filename
|
54 |
+
file_datetime = extract_date_from_filename(uploaded_file.name)
|
55 |
+
file_date = file_datetime.date()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
data['File Date'] = file_date
|
58 |
combined_data = pd.concat([combined_data, data], ignore_index=True)
|
|
|
165 |
(filtered_data['Environment'] == selected_env)]
|
166 |
|
167 |
if not day_scenarios.empty:
|
168 |
+
st.dataframe(day_scenarios[['Functional area', 'Scenario Name', 'Error Message', 'Time spent(m:s)']])
|
169 |
else:
|
170 |
st.write("No failing scenarios found for the selected date and environment.")
|
171 |
|