BananaSauce commited on
Commit
d2ed71e
·
1 Parent(s): c6b142e

complete xlsx support

Browse files
Files changed (4) hide show
  1. app.py +40 -20
  2. multiple.py +58 -39
  3. pre.py +100 -32
  4. second.py +21 -3
app.py CHANGED
@@ -16,6 +16,11 @@ def single_main(uploaded_file):
16
  # Process the file with header
17
  data = preprocess_uploaded_file(uploaded_file)
18
 
 
 
 
 
 
19
  # Display scenarios with status "failed" grouped by functional area
20
  failed_scenarios = data[data['Status'] == 'FAILED']
21
  passed_scenarios = data[data['Status'] == 'PASSED']
@@ -44,6 +49,9 @@ def single_main(uploaded_file):
44
  # st.write(f"Scenarios with status '{selected_status}' grouped by functional area:")
45
  st.markdown(f"### Scenarios with status '{selected_status}' grouped by functional area:")
46
 
 
 
 
47
  # Select a range of functional areas to filter scenarios
48
  selected_functional_areas = st.multiselect("Select functional areas", unique_areas, ["All"])
49
 
@@ -55,7 +63,10 @@ def single_main(uploaded_file):
55
  if not selected_functional_areas: # Check if the list is empty
56
  st.error("Please select at least one functional area.")
57
  else:
58
- # Calculate the average time spent for each functional area
 
 
 
59
  average_time_spent_seconds = filtered_scenarios.groupby('Functional area')['Time spent'].mean().reset_index()
60
 
61
  # Convert average time spent from seconds to minutes and seconds format
@@ -69,7 +80,11 @@ def single_main(uploaded_file):
69
 
70
  # Filter scenarios based on selected functional area
71
  if selected_status == 'Failed':
72
- grouped_filtered_scenarios = filtered_scenarios.groupby('Functional area')[['Scenario Name', 'Error Message','Time spent(m:s)']].apply(lambda x: x.reset_index(drop=True))
 
 
 
 
73
  elif selected_status == 'Passed':
74
  grouped_filtered_scenarios = filtered_scenarios.groupby('Functional area')[['Scenario Name', 'Time spent(m:s)']].apply(lambda x: x.reset_index(drop=True))
75
  else:
@@ -96,24 +111,29 @@ def single_main(uploaded_file):
96
  # Create and display bar graph of errors by functional area
97
  st.write(f"### Bar graph showing number of '{selected_status}' scenarios in each functional area:")
98
  error_counts = grouped_filtered_scenarios['Functional area'].value_counts()
99
- plt.figure(figsize=(10, 6))
100
- plt.bar(error_counts.index, error_counts.values)
101
- plt.xlabel('Functional Area')
102
- plt.ylabel('Number of Failures')
103
- plt.title(f"Number of '{selected_status}' scenarios by Functional Area")
104
- plt.xticks(rotation=45, ha='right')
105
-
106
- # Set y-axis limits and ticks for consistent interval of 1
107
- y_max = max(error_counts.values) + 1
108
- plt.ylim(0, y_max)
109
- plt.yticks(range(0, y_max, 1))
110
-
111
- # Display individual numbers on y-axis
112
- for i, count in enumerate(error_counts.values):
113
- plt.text(i, count, str(count), ha='center', va='bottom')
114
-
115
- plt.tight_layout() # Add this line to adjust layout
116
- st.pyplot(plt)
 
 
 
 
 
117
  else:
118
  st.write("### No scenarios with status 'failed' found.")
119
  pass
 
16
  # Process the file with header
17
  data = preprocess_uploaded_file(uploaded_file)
18
 
19
+ # Display debugging information
20
+ st.write("Data shape:", data.shape)
21
+ st.write("Unique functional areas:", data['Functional area'].nunique())
22
+ st.write("Sample of data:", data.head())
23
+
24
  # Display scenarios with status "failed" grouped by functional area
25
  failed_scenarios = data[data['Status'] == 'FAILED']
26
  passed_scenarios = data[data['Status'] == 'PASSED']
 
49
  # st.write(f"Scenarios with status '{selected_status}' grouped by functional area:")
50
  st.markdown(f"### Scenarios with status '{selected_status}' grouped by functional area:")
51
 
52
+ # Display count of unique functional areas
53
+ # st.write(f"Number of unique functional areas: {len(unique_areas) - 1}") # Subtract 1 for "All"
54
+
55
  # Select a range of functional areas to filter scenarios
56
  selected_functional_areas = st.multiselect("Select functional areas", unique_areas, ["All"])
57
 
 
63
  if not selected_functional_areas: # Check if the list is empty
64
  st.error("Please select at least one functional area.")
65
  else:
66
+ # Display count of filtered scenarios
67
+ st.write(f"Number of filtered scenarios: {len(filtered_scenarios)}")
68
+
69
+ # Calculate the average time spent for each functional area
70
  average_time_spent_seconds = filtered_scenarios.groupby('Functional area')['Time spent'].mean().reset_index()
71
 
72
  # Convert average time spent from seconds to minutes and seconds format
 
80
 
81
  # Filter scenarios based on selected functional area
82
  if selected_status == 'Failed':
83
+ # Check if Failed Step column exists
84
+ if 'Failed Step' in filtered_scenarios.columns:
85
+ grouped_filtered_scenarios = filtered_scenarios.groupby('Functional area')[['Scenario Name', 'Error Message', 'Failed Step', 'Time spent(m:s)']].apply(lambda x: x.reset_index(drop=True))
86
+ else:
87
+ grouped_filtered_scenarios = filtered_scenarios.groupby('Functional area')[['Scenario Name', 'Error Message', 'Time spent(m:s)']].apply(lambda x: x.reset_index(drop=True))
88
  elif selected_status == 'Passed':
89
  grouped_filtered_scenarios = filtered_scenarios.groupby('Functional area')[['Scenario Name', 'Time spent(m:s)']].apply(lambda x: x.reset_index(drop=True))
90
  else:
 
111
  # Create and display bar graph of errors by functional area
112
  st.write(f"### Bar graph showing number of '{selected_status}' scenarios in each functional area:")
113
  error_counts = grouped_filtered_scenarios['Functional area'].value_counts()
114
+
115
+ # Only create the graph if there are errors to display
116
+ if not error_counts.empty:
117
+ plt.figure(figsize=(10, 6))
118
+ plt.bar(error_counts.index, error_counts.values)
119
+ plt.xlabel('Functional Area')
120
+ plt.ylabel('Number of Failures')
121
+ plt.title(f"Number of '{selected_status}' scenarios by Functional Area")
122
+ plt.xticks(rotation=45, ha='right')
123
+
124
+ # Set y-axis limits and ticks for consistent interval of 1
125
+ y_max = max(error_counts.values) + 1
126
+ plt.ylim(0, y_max)
127
+ plt.yticks(range(0, y_max, 1))
128
+
129
+ # Display individual numbers on y-axis
130
+ for i, count in enumerate(error_counts.values):
131
+ plt.text(i, count, str(count), ha='center', va='bottom')
132
+
133
+ plt.tight_layout() # Add this line to adjust layout
134
+ st.pyplot(plt)
135
+ else:
136
+ st.info(f"No '{selected_status}' scenarios found to display in the graph.")
137
  else:
138
  st.write("### No scenarios with status 'failed' found.")
139
  pass
multiple.py CHANGED
@@ -9,6 +9,11 @@ def perform_analysis(uploaded_dataframes):
9
  # Concatenate all dataframes into a single dataframe
10
  combined_data = pd.concat(uploaded_dataframes, ignore_index=True)
11
 
 
 
 
 
 
12
  # Display scenarios with status "failed" grouped by functional area
13
  failed_scenarios = combined_data[combined_data['Status'] == 'FAILED']
14
  passed_scenarios = combined_data[combined_data['Status'] == 'PASSED']
@@ -34,6 +39,9 @@ def perform_analysis(uploaded_dataframes):
34
  # st.write(f"Scenarios with status '{selected_status}' grouped by functional area:")
35
  st.markdown(f"### Scenarios with status '{selected_status}' grouped by functional area:")
36
 
 
 
 
37
  # Select a range of functional areas to filter scenarios
38
  selected_functional_areas = st.multiselect("Select functional areas", unique_areas, ["All"])
39
 
@@ -45,7 +53,10 @@ def perform_analysis(uploaded_dataframes):
45
  if not selected_functional_areas: # Check if the list is empty
46
  st.error("Please select at least one functional area.")
47
  else:
48
- # Calculate the average time spent for each functional area
 
 
 
49
  average_time_spent_seconds = filtered_scenarios.groupby('Functional area')['Time spent'].mean().reset_index()
50
  # Convert average time spent from seconds to minutes and seconds format
51
  average_time_spent_seconds['Time spent'] = pd.to_datetime(average_time_spent_seconds['Time spent'], unit='s').dt.strftime('%M:%S')
@@ -69,7 +80,11 @@ def perform_analysis(uploaded_dataframes):
69
 
70
  # Filter scenarios based on selected functional area
71
  if selected_status == 'Failed':
72
- grouped_filtered_scenarios = filtered_scenarios.groupby('Environment')[['Functional area', 'Scenario Name', 'Error Message','Time spent(m:s)','Start datetime']].apply(lambda x: x.reset_index(drop=True))
 
 
 
 
73
  elif selected_status == 'Passed':
74
  grouped_filtered_scenarios = filtered_scenarios.groupby('Functional area')[['Scenario Name', 'Time spent(m:s)']].apply(lambda x: x.reset_index(drop=True))
75
  else:
@@ -86,59 +101,63 @@ def perform_analysis(uploaded_dataframes):
86
  # Sort the average time spent table by start datetime
87
  average_time_spent_seconds = average_time_spent_seconds.sort_values(by='Start datetime')
88
 
89
- # Display average time spent on each functional area in a table
90
- st.markdown("### Total and Average Time Spent on Each Functional Area")
91
- average_time_spent_seconds.index = average_time_spent_seconds.index + 1
92
- # Rename the columns for clarity
93
- average_time_spent_seconds.rename(columns={'Start datetime': 'Start Datetime', 'End datetime': 'End Datetime', 'Time spent':'Average Time Spent'}, inplace=True)
94
- # Rearrange the columns
95
- average_time_spent_seconds = average_time_spent_seconds[['Functional area', 'Total Time Spent', 'Start Datetime', 'End Datetime', 'Average Time Spent']]
96
- st.dataframe(average_time_spent_seconds)
97
 
98
  # Check if selected_status is 'Failed' and grouped_filtered_scenarifos length is less than or equal to 400
99
  if selected_status != 'Passed':
100
  # Create and display bar graph of errors by functional area
101
  st.write(f"### Bar graph showing number of '{selected_status}' scenarios in each functional area:")
102
  error_counts = grouped_filtered_scenarios['Functional area'].value_counts()
103
- plt.figure(figsize=(12, 10))
104
- bars = plt.bar(error_counts.index, error_counts.values)
105
- plt.xlabel('Functional Area')
106
- plt.ylabel('Number of Failures')
107
- plt.title(f"Number of '{selected_status}' scenarios by Functional Area")
108
- plt.xticks(rotation=45, ha='right', fontsize=10)
109
- # Set y-axis limits and ticks for consistent interval of 1
110
- y_max = max(error_counts.values) + 1
111
- plt.ylim(0, y_max)
112
- plt.yticks(range(0, y_max, 1), fontsize=10)
113
 
114
- # Display individual numbers on y-axis
115
- for bar in bars:
116
- height = bar.get_height()
117
- plt.text(bar.get_x() + bar.get_width() / 2, height, str(int(height)),
118
- ha='center', va='bottom') # Reduce font size of individual numbers
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
- plt.tight_layout() # Add this line to adjust layout
121
- st.pyplot(plt)
 
 
122
  pass
123
 
124
  def multiple_main():
125
 
126
- # Get the number of environments from the user
127
- num_environments = st.number_input("Enter the number of environments", min_value=1, value=1, step=1)
128
-
129
  # Initialize list to store uploaded dataframes
130
  uploaded_dataframes = []
131
-
132
  # Loop through the number of environments and create file uploaders
133
- for i in range(num_environments):
134
- uploaded_files = st.file_uploader(f"Upload CSV or XLSX files for Environment {i + 1}", type=["csv", "xlsx"], accept_multiple_files=True)
 
 
 
 
135
 
136
- for uploaded_file in uploaded_files:
137
- # Preprocess the uploaded file
138
- data = preprocess_uploaded_file(uploaded_file)
139
-
140
- # Append the dataframe to the list
141
- uploaded_dataframes.append(data)
142
 
143
  # Check if any files were uploaded
144
  if uploaded_dataframes:
 
9
  # Concatenate all dataframes into a single dataframe
10
  combined_data = pd.concat(uploaded_dataframes, ignore_index=True)
11
 
12
+ # Display debugging information
13
+ # st.write("Combined data shape:", combined_data.shape)
14
+ # st.write("Unique functional areas in combined data:", combined_data['Functional area'].nunique())
15
+ # st.write("Sample of combined data:", combined_data.head())
16
+
17
  # Display scenarios with status "failed" grouped by functional area
18
  failed_scenarios = combined_data[combined_data['Status'] == 'FAILED']
19
  passed_scenarios = combined_data[combined_data['Status'] == 'PASSED']
 
39
  # st.write(f"Scenarios with status '{selected_status}' grouped by functional area:")
40
  st.markdown(f"### Scenarios with status '{selected_status}' grouped by functional area:")
41
 
42
+ # # Display count of unique functional areas
43
+ # st.write(f"Number of unique functional areas: {len(unique_areas) - 1}") # Subtract 1 for "All"
44
+
45
  # Select a range of functional areas to filter scenarios
46
  selected_functional_areas = st.multiselect("Select functional areas", unique_areas, ["All"])
47
 
 
53
  if not selected_functional_areas: # Check if the list is empty
54
  st.error("Please select at least one functional area.")
55
  else:
56
+ # Display count of filtered scenarios
57
+ st.write(f"Number of filtered scenarios: {len(filtered_scenarios)}")
58
+
59
+ # Calculate the average time spent for each functional area
60
  average_time_spent_seconds = filtered_scenarios.groupby('Functional area')['Time spent'].mean().reset_index()
61
  # Convert average time spent from seconds to minutes and seconds format
62
  average_time_spent_seconds['Time spent'] = pd.to_datetime(average_time_spent_seconds['Time spent'], unit='s').dt.strftime('%M:%S')
 
80
 
81
  # Filter scenarios based on selected functional area
82
  if selected_status == 'Failed':
83
+ # Check if Failed Step column exists
84
+ if 'Failed Step' in filtered_scenarios.columns:
85
+ grouped_filtered_scenarios = filtered_scenarios.groupby('Environment')[['Functional area', 'Scenario Name', 'Error Message', 'Failed Step', 'Time spent(m:s)', 'Start datetime']].apply(lambda x: x.reset_index(drop=True))
86
+ else:
87
+ grouped_filtered_scenarios = filtered_scenarios.groupby('Environment')[['Functional area', 'Scenario Name', 'Error Message', 'Time spent(m:s)', 'Start datetime']].apply(lambda x: x.reset_index(drop=True))
88
  elif selected_status == 'Passed':
89
  grouped_filtered_scenarios = filtered_scenarios.groupby('Functional area')[['Scenario Name', 'Time spent(m:s)']].apply(lambda x: x.reset_index(drop=True))
90
  else:
 
101
  # Sort the average time spent table by start datetime
102
  average_time_spent_seconds = average_time_spent_seconds.sort_values(by='Start datetime')
103
 
104
+ # # Display average time spent on each functional area in a table
105
+ # st.markdown("### Total and Average Time Spent on Each Functional Area")
106
+ # average_time_spent_seconds.index = average_time_spent_seconds.index + 1
107
+ # # Rename the columns for clarity
108
+ # average_time_spent_seconds.rename(columns={'Start datetime': 'Start Datetime', 'End datetime': 'End Datetime', 'Time spent':'Average Time Spent'}, inplace=True)
109
+ # # Rearrange the columns
110
+ # average_time_spent_seconds = average_time_spent_seconds[['Functional area', 'Total Time Spent', 'Start Datetime', 'End Datetime', 'Average Time Spent']]
111
+ # st.dataframe(average_time_spent_seconds)
112
 
113
  # Check if selected_status is 'Failed' and grouped_filtered_scenarifos length is less than or equal to 400
114
  if selected_status != 'Passed':
115
  # Create and display bar graph of errors by functional area
116
  st.write(f"### Bar graph showing number of '{selected_status}' scenarios in each functional area:")
117
  error_counts = grouped_filtered_scenarios['Functional area'].value_counts()
 
 
 
 
 
 
 
 
 
 
118
 
119
+ # Only create the graph if there are errors to display
120
+ if not error_counts.empty:
121
+ plt.figure(figsize=(12, 10))
122
+ bars = plt.bar(error_counts.index, error_counts.values)
123
+ plt.xlabel('Functional Area')
124
+ plt.ylabel('Number of Failures')
125
+ plt.title(f"Number of '{selected_status}' scenarios by Functional Area")
126
+ plt.xticks(rotation=45, ha='right', fontsize=10)
127
+ # Set y-axis limits and ticks for consistent interval of 1
128
+ y_max = max(error_counts.values) + 1
129
+ plt.ylim(0, y_max)
130
+ plt.yticks(range(0, y_max, 1), fontsize=10)
131
+
132
+ # Display individual numbers on y-axis
133
+ for bar in bars:
134
+ height = bar.get_height()
135
+ plt.text(bar.get_x() + bar.get_width() / 2, height, str(int(height)),
136
+ ha='center', va='bottom') # Reduce font size of individual numbers
137
 
138
+ plt.tight_layout() # Add this line to adjust layout
139
+ st.pyplot(plt)
140
+ else:
141
+ st.info(f"No '{selected_status}' scenarios found to display in the graph.")
142
  pass
143
 
144
  def multiple_main():
145
 
146
+ # num_environments = st.number_input("Enter the number of environments", min_value=1, value=1, step=1)
147
+
 
148
  # Initialize list to store uploaded dataframes
149
  uploaded_dataframes = []
150
+
151
  # Loop through the number of environments and create file uploaders
152
+ # for i in range(num_environments):
153
+ uploaded_files = st.file_uploader("Upload multiple XLSX files from different environments", type=["xlsx"], accept_multiple_files=True)
154
+
155
+ for uploaded_file in uploaded_files:
156
+ # Preprocess the uploaded file
157
+ data = preprocess_uploaded_file(uploaded_file)
158
 
159
+ # Append the dataframe to the list
160
+ uploaded_dataframes.append(data)
 
 
 
 
161
 
162
  # Check if any files were uploaded
163
  if uploaded_dataframes:
pre.py CHANGED
@@ -39,27 +39,63 @@ def preprocess_xlsx(uploaded_file):
39
  dtype_dict = {
40
  'Feature Name': 'string',
41
  'Scenario Name': 'string',
42
- 'Total Time Taken (ms)': 'float64'
 
43
  }
44
 
45
  # Read both the first sheet for error messages and "Time Taken" sheet
46
  excel_file = pd.ExcelFile(uploaded_file, engine='openpyxl')
47
 
48
- # Read error messages from first sheet
49
  error_df = pd.read_excel(excel_file, sheet_name=0)
50
 
51
- # Read time taken data
52
  df = pd.read_excel(
53
  excel_file,
54
  sheet_name='Time Taken',
55
  dtype=dtype_dict
56
  )
57
 
 
 
 
 
 
 
 
 
 
 
58
  # Convert Failed Scenario column to boolean after reading
59
- df['Failed Scenario'] = df['Failed Scenario'].astype(str).map({'TRUE': True, 'FALSE': False})
 
 
 
 
 
 
 
 
 
60
 
61
- # Get error messages from the first sheet
62
- error_messages = error_df[['Scenario Name', 'Error Message']].copy()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  # Extract date from filename (e.g., RI2211_batch_20250225_27031.xlsx)
65
  filename = uploaded_file.name
@@ -81,12 +117,17 @@ def preprocess_xlsx(uploaded_file):
81
  result_df = pd.DataFrame({
82
  'Functional area': df['Feature Name'],
83
  'Scenario Name': df['Scenario Name'],
84
- 'Status': df['Failed Scenario'].map({True: 'FAILED', False: 'PASSED'}),
85
  'Time spent': df['Total Time Taken (ms)'] / 1000 # Convert ms to seconds
86
  })
87
 
 
 
 
88
  # Merge error messages with result dataframe
89
- result_df = result_df.merge(error_messages, on='Scenario Name', how='left')
 
 
90
 
91
  # Add environment column
92
  result_df['Environment'] = environment
@@ -94,10 +135,37 @@ def preprocess_xlsx(uploaded_file):
94
  # Calculate formatted time spent
95
  result_df['Time spent(m:s)'] = pd.to_datetime(result_df['Time spent'], unit='s').dt.strftime('%M:%S')
96
 
97
- # Add start datetime (using file date since actual start time isn't available in this sheet)
98
  result_df['Start datetime'] = pd.to_datetime(file_date)
99
  result_df['End datetime'] = result_df['Start datetime'] + pd.to_timedelta(result_df['Time spent'], unit='s')
100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  return result_df
102
 
103
  def fill_missing_data(data, column_index, value):
@@ -111,29 +179,29 @@ def to_camel_case(s):
111
 
112
  # Define the function to preprocess a file (CSV or XLSX)
113
  def preprocess_uploaded_file(uploaded_file):
114
- with st.spinner(f'Processing {uploaded_file.name}...'):
115
- # Determine file type based on extension
116
- if uploaded_file.name.lower().endswith('.xlsx'):
117
- data = preprocess_xlsx(uploaded_file)
118
- else:
119
- # Original CSV processing
120
- file_content = uploaded_file.read()
121
- processed_output = preprocess_csv(file_content)
122
- processed_file = io.StringIO(processed_output.getvalue())
123
- data = load_data(processed_file)
124
- data = fill_missing_data(data, 4, 0)
125
- data['Start datetime'] = pd.to_datetime(data['Start datetime'], dayfirst=True, errors='coerce')
126
- data['End datetime'] = pd.to_datetime(data['End datetime'], dayfirst=True, errors='coerce')
127
- data['Time spent'] = (data['End datetime'] - data['Start datetime']).dt.total_seconds()
128
- data['Time spent(m:s)'] = pd.to_datetime(data['Time spent'], unit='s').dt.strftime('%M:%S')
129
-
130
- # Extract environment name from filename
131
- filename = uploaded_file.name
132
- environment = filename.split('_Puppeteer')[0]
133
-
134
- # Add environment column to the dataframe
135
- data['Environment'] = environment
136
-
137
  return data
138
 
139
  def add_app_description():
 
39
  dtype_dict = {
40
  'Feature Name': 'string',
41
  'Scenario Name': 'string',
42
+ 'Total Time Taken (ms)': 'float64',
43
+ 'Failed Scenario': 'string'
44
  }
45
 
46
  # Read both the first sheet for error messages and "Time Taken" sheet
47
  excel_file = pd.ExcelFile(uploaded_file, engine='openpyxl')
48
 
49
+ # Read detailed step data from first sheet (contains error messages)
50
  error_df = pd.read_excel(excel_file, sheet_name=0)
51
 
52
+ # Read time taken data from the "Time Taken" sheet
53
  df = pd.read_excel(
54
  excel_file,
55
  sheet_name='Time Taken',
56
  dtype=dtype_dict
57
  )
58
 
59
+ # Print column names and sample values for debugging
60
+ # st.write("Excel columns:", df.columns.tolist())
61
+ # st.write("Sample data from Time Taken sheet:", df.head())
62
+ # st.write("Unique Feature Names:", df['Feature Name'].unique())
63
+ # st.write("Feature Name count:", df['Feature Name'].nunique())
64
+
65
+ # # Check for any empty or NaN values in Feature Name
66
+ # empty_features = df['Feature Name'].isna().sum()
67
+ # st.write(f"Empty Feature Names: {empty_features}")
68
+
69
  # Convert Failed Scenario column to boolean after reading
70
+ # Handle different possible values (TRUE/FALSE, True/False, etc.)
71
+ df['Failed Scenario'] = df['Failed Scenario'].astype(str).str.upper()
72
+ df['Status'] = df['Failed Scenario'].map(
73
+ lambda x: 'FAILED' if x in ['TRUE', 'YES', 'Y', '1'] else 'PASSED'
74
+ )
75
+
76
+ # Count failed and passed scenarios
77
+ failed_count = (df['Status'] == 'FAILED').sum()
78
+ passed_count = (df['Status'] == 'PASSED').sum()
79
+
80
 
81
+ # Extract error messages from the first sheet
82
+ # Find rows with FAILED result and group by Scenario Name to get the error message
83
+ if 'Result' in error_df.columns:
84
+ failed_steps = error_df[error_df['Result'] == 'FAILED'].copy()
85
+
86
+ # If there are failed steps, get the error messages
87
+ if not failed_steps.empty:
88
+ # Group by Scenario Name and get the first error message and step for each scenario
89
+ error_messages = failed_steps.groupby('Scenario Name').agg({
90
+ 'Error Message': 'first',
91
+ 'Step': 'first' # Capture the step where it failed
92
+ }).reset_index()
93
+ else:
94
+ # Create empty DataFrame with required columns
95
+ error_messages = pd.DataFrame(columns=['Scenario Name', 'Error Message', 'Step'])
96
+ else:
97
+ # If Result column doesn't exist, create empty DataFrame
98
+ error_messages = pd.DataFrame(columns=['Scenario Name', 'Error Message', 'Step'])
99
 
100
  # Extract date from filename (e.g., RI2211_batch_20250225_27031.xlsx)
101
  filename = uploaded_file.name
 
117
  result_df = pd.DataFrame({
118
  'Functional area': df['Feature Name'],
119
  'Scenario Name': df['Scenario Name'],
120
+ 'Status': df['Status'],
121
  'Time spent': df['Total Time Taken (ms)'] / 1000 # Convert ms to seconds
122
  })
123
 
124
+ # Fill any NaN values in Functional area
125
+ result_df['Functional area'] = result_df['Functional area'].fillna('Unknown')
126
+
127
  # Merge error messages with result dataframe
128
+ if not error_messages.empty:
129
+ result_df = result_df.merge(error_messages[['Scenario Name', 'Error Message', 'Step']],
130
+ on='Scenario Name', how='left')
131
 
132
  # Add environment column
133
  result_df['Environment'] = environment
 
135
  # Calculate formatted time spent
136
  result_df['Time spent(m:s)'] = pd.to_datetime(result_df['Time spent'], unit='s').dt.strftime('%M:%S')
137
 
138
+
139
  result_df['Start datetime'] = pd.to_datetime(file_date)
140
  result_df['End datetime'] = result_df['Start datetime'] + pd.to_timedelta(result_df['Time spent'], unit='s')
141
 
142
+ # Add failed step information if available
143
+ if 'Step' in result_df.columns:
144
+ result_df['Failed Step'] = result_df['Step']
145
+ result_df.drop('Step', axis=1, inplace=True)
146
+
147
+ # Extract start time from the first sheet
148
+ before_steps = error_df[error_df['Step'].str.contains('before', case=False, na=False)]
149
+ if not before_steps.empty:
150
+ # Get the first 'before' step for each scenario
151
+ before_steps['Time Stamp'] = pd.to_datetime(before_steps['Time Stamp'], format='%H:%M:%S', errors='coerce')
152
+ start_times = before_steps.groupby('Scenario Name').agg({'Time Stamp': 'first'}).reset_index()
153
+ # Store the timestamps in a variable for efficient reuse
154
+ result_df = result_df.merge(start_times, on='Scenario Name', how='left')
155
+ result_df.rename(columns={'Time Stamp': 'Scenario Start Time'}, inplace=True)
156
+ scenario_start_times = result_df['Scenario Start Time']
157
+ # Combine the date from the filename with the time stamp
158
+ result_df['Start datetime'] = pd.to_datetime(scenario_start_times.dt.strftime('%H:%M:%S') + ' ' + file_date.strftime('%Y-%m-%d'))
159
+
160
+ # Print counts for debugging
161
+ # st.write(f"Processed data - Failed: {len(result_df[result_df['Status'] == 'FAILED'])}, Passed: {len(result_df[result_df['Status'] == 'PASSED'])}")
162
+ # st.write(f"Unique functional areas in processed data: {result_df['Functional area'].nunique()}")
163
+ # st.write(f"Unique functional areas: {result_df['Functional area'].unique()}")
164
+
165
+ # Debugging: Print the columns of the first sheet
166
+ # st.write("Columns in the first sheet:", error_df.columns.tolist())
167
+ # st.write("Sample data from the first sheet:", error_df.head())
168
+
169
  return result_df
170
 
171
  def fill_missing_data(data, column_index, value):
 
179
 
180
  # Define the function to preprocess a file (CSV or XLSX)
181
  def preprocess_uploaded_file(uploaded_file):
182
+ # Commenting out the spinner to disable it
183
+ # with st.spinner(f'Processing {uploaded_file.name}...'):
184
+ # Determine file type based on extension
185
+ if uploaded_file.name.lower().endswith('.xlsx'):
186
+ data = preprocess_xlsx(uploaded_file)
187
+ else:
188
+ # Original CSV processing
189
+ file_content = uploaded_file.read()
190
+ processed_output = preprocess_csv(file_content)
191
+ processed_file = io.StringIO(processed_output.getvalue())
192
+ data = load_data(processed_file)
193
+ data = fill_missing_data(data, 4, 0)
194
+ data['Start datetime'] = pd.to_datetime(data['Start datetime'], dayfirst=True, errors='coerce')
195
+ data['End datetime'] = pd.to_datetime(data['End datetime'], dayfirst=True, errors='coerce')
196
+ data['Time spent'] = (data['End datetime'] - data['Start datetime']).dt.total_seconds()
197
+ data['Time spent(m:s)'] = pd.to_datetime(data['Time spent'], unit='s').dt.strftime('%M:%S')
198
+
199
+ # Extract environment name from filename
200
+ filename = uploaded_file.name
201
+ environment = filename.split('_Puppeteer')[0]
202
+
203
+ # Add environment column to the dataframe
204
+ data['Environment'] = environment
205
  return data
206
 
207
  def add_app_description():
second.py CHANGED
@@ -39,6 +39,17 @@ def double_main(uploaded_file1, uploaded_file2):
39
  # Merge dataframes on 'Scenario Name'
40
  merged_df = pd.merge(older_df, newer_df, on=['Functional area', 'Scenario Name'], suffixes=('_old', '_new'))
41
 
 
 
 
 
 
 
 
 
 
 
 
42
  # Filter scenarios
43
  fail_to_fail_scenarios = merged_df[(merged_df['Status_old'] == 'FAILED') & (merged_df['Status_new'] == 'FAILED')]
44
  pass_to_fail_scenarios = merged_df[(merged_df['Status_old'] == 'PASSED') & (merged_df['Status_new'] == 'FAILED')]
@@ -101,21 +112,28 @@ def double_main(uploaded_file1, uploaded_file2):
101
 
102
  with tab1:
103
  st.write(f"Failing scenarios Count: {fail_count}")
104
- columns_to_display1 = ['Functional area', 'Scenario Name', 'Error message_old', 'Error message_new']
 
 
 
105
  st.dataframe(fail_to_fail_scenarios[columns_to_display1])
106
  csv = convert_df(fail_to_fail_scenarios[columns_to_display1])
107
  st.download_button("Download Consistent Failures as CSV", data=csv, file_name='consistent_failures.csv', mime='text/csv')
108
 
109
  with tab2:
110
  st.write(f"Failing scenarios Count: {pass_fail_count}")
111
- columns_to_display2 = ['Functional area', 'Scenario Name', 'Error message_new', 'Time spent_old', 'Time spent_new']
 
 
112
  st.dataframe(pass_to_fail_scenarios[columns_to_display2])
113
  csv = convert_df(pass_to_fail_scenarios[columns_to_display2])
114
  st.download_button("Download New Failures as CSV", data=csv, file_name='new_failures.csv', mime='text/csv')
115
 
116
  with tab3:
117
  st.write(f"Passing scenarios Count: {pass_count}")
118
- columns_to_display3 = ['Functional area', 'Scenario Name', 'Error message_old', 'Time spent_old', 'Time spent_new']
 
 
119
  st.dataframe(fail_to_pass_scenarios[columns_to_display3])
120
  csv = convert_df(fail_to_pass_scenarios[columns_to_display3])
121
  st.download_button("Download New Passes as CSV", data=csv, file_name='new_passes.csv', mime='text/csv')
 
39
  # Merge dataframes on 'Scenario Name'
40
  merged_df = pd.merge(older_df, newer_df, on=['Functional area', 'Scenario Name'], suffixes=('_old', '_new'))
41
 
42
+ # Ensure column naming consistency
43
+ # Rename columns if needed for consistency
44
+ column_mapping = {}
45
+ for col in merged_df.columns:
46
+ if col.startswith('Error message'):
47
+ new_col = col.replace('Error message', 'Error Message')
48
+ column_mapping[col] = new_col
49
+
50
+ if column_mapping:
51
+ merged_df.rename(columns=column_mapping, inplace=True)
52
+
53
  # Filter scenarios
54
  fail_to_fail_scenarios = merged_df[(merged_df['Status_old'] == 'FAILED') & (merged_df['Status_new'] == 'FAILED')]
55
  pass_to_fail_scenarios = merged_df[(merged_df['Status_old'] == 'PASSED') & (merged_df['Status_new'] == 'FAILED')]
 
112
 
113
  with tab1:
114
  st.write(f"Failing scenarios Count: {fail_count}")
115
+ # Check if Failed Step columns exist
116
+ columns_to_display1 = ['Functional area', 'Scenario Name', 'Error Message_old', 'Error Message_new']
117
+ if 'Failed Step_old' in fail_to_fail_scenarios.columns and 'Failed Step_new' in fail_to_fail_scenarios.columns:
118
+ columns_to_display1 = ['Functional area', 'Scenario Name', 'Error Message_old', 'Failed Step_old', 'Error Message_new', 'Failed Step_new']
119
  st.dataframe(fail_to_fail_scenarios[columns_to_display1])
120
  csv = convert_df(fail_to_fail_scenarios[columns_to_display1])
121
  st.download_button("Download Consistent Failures as CSV", data=csv, file_name='consistent_failures.csv', mime='text/csv')
122
 
123
  with tab2:
124
  st.write(f"Failing scenarios Count: {pass_fail_count}")
125
+ columns_to_display2 = ['Functional area', 'Scenario Name', 'Error Message_new', 'Time spent_old', 'Time spent_new']
126
+ if 'Failed Step_new' in pass_to_fail_scenarios.columns:
127
+ columns_to_display2 = ['Functional area', 'Scenario Name', 'Error Message_new', 'Failed Step_new', 'Time spent_old', 'Time spent_new']
128
  st.dataframe(pass_to_fail_scenarios[columns_to_display2])
129
  csv = convert_df(pass_to_fail_scenarios[columns_to_display2])
130
  st.download_button("Download New Failures as CSV", data=csv, file_name='new_failures.csv', mime='text/csv')
131
 
132
  with tab3:
133
  st.write(f"Passing scenarios Count: {pass_count}")
134
+ columns_to_display3 = ['Functional area', 'Scenario Name', 'Error Message_old', 'Time spent_old', 'Time spent_new']
135
+ if 'Failed Step_old' in fail_to_pass_scenarios.columns:
136
+ columns_to_display3 = ['Functional area', 'Scenario Name', 'Error Message_old', 'Failed Step_old', 'Time spent_old', 'Time spent_new']
137
  st.dataframe(fail_to_pass_scenarios[columns_to_display3])
138
  csv = convert_df(fail_to_pass_scenarios[columns_to_display3])
139
  st.download_button("Download New Passes as CSV", data=csv, file_name='new_passes.csv', mime='text/csv')