BananaSauce commited on
Commit
cd8fcb3
·
verified ·
1 Parent(s): e24c1ad

Update weekly.py

Browse files
Files changed (1) hide show
  1. weekly.py +130 -55
weekly.py CHANGED
@@ -1,73 +1,148 @@
1
  import pandas as pd
2
  import streamlit as st
3
- import matplotlib.pyplot as plt
4
- import io
5
  from pre import preprocess_uploaded_file
6
 
7
- from collections import defaultdict
8
-
9
  def generate_weekly_report(uploaded_files):
 
 
 
10
 
11
- # Create a dictionary to store the number of failures for each environment and day
12
- environment_daily_failures = {}
13
-
14
  for uploaded_file in uploaded_files:
15
- # Preprocess the uploaded CSV file (you can use your existing preprocessing code)
16
  data = preprocess_uploaded_file(uploaded_file)
 
17
 
18
- # Extract the start date from the 'Start datetime' column for this file
19
- start_date = data['Start datetime'].dt.date.iloc[0]
20
-
21
- # Calculate the number of failures for this file
22
- num_failures = len(data[data['Status'] == 'FAILED'])
23
-
24
- # Get the environment variable from the data frame
25
- environment = data['Environment'].iloc[0]
26
-
27
- # Create a unique key for each environment and day
28
- key = (environment, start_date)
29
-
30
- # Add the number of failures to the corresponding environment and day in the dictionary
31
- if key in environment_daily_failures:
32
- environment_daily_failures[key] += num_failures
33
- else:
34
- environment_daily_failures[key] = num_failures
35
 
36
- # Create a list of unique environments
37
- unique_environments = list(set([key[0] for key in environment_daily_failures.keys()]))
38
 
39
- # Create a larger line chart with separate lines for each environment
40
- plt.figure(figsize=(12, 8))
 
41
 
42
- for environment in unique_environments:
43
- # Filter the data for the current environment
44
- environment_data = [(key[1], value) for key, value in environment_daily_failures.items() if key[0] == environment]
45
 
46
- # Sort the data by date
47
- environment_data.sort(key=lambda x: x[0])
 
48
 
49
- # Extract dates and failures for the current environment
50
- dates = [date.strftime("%d-%b") for date, _ in environment_data]
51
- failures = [count for _, count in environment_data]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
- # Plot the data as a line
54
- plt.plot(dates, failures, marker='o', linestyle='-', label=f'Environment: {environment}')
 
 
 
55
 
56
- plt.xlabel('Date', fontsize=14)
57
- plt.ylabel('Number of Failures', fontsize=14)
58
- plt.title('Trends in Failure Rates Over Days', fontsize=16)
59
- plt.xticks(rotation=45, fontsize=12)
60
- plt.yticks(fontsize=12)
61
- plt.grid(True)
62
- plt.legend(fontsize=12) # Add a legend to differentiate environments
63
-
64
- # Add labels with the number of failures at each data point with larger font
65
- for environment in unique_environments:
66
- environment_data = [(key[1], value) for key, value in environment_daily_failures.items() if key[0] == environment]
67
- for i in range(len(environment_data)):
68
- plt.text(environment_data[i][0].strftime("%d-%b"), environment_data[i][1], str(environment_data[i][1]), ha='center', va='bottom', fontsize=12)
69
 
70
- plt.tight_layout()
71
 
72
- # Display the larger line chart
73
- st.pyplot(plt)
 
 
1
  import pandas as pd
2
  import streamlit as st
3
+ import plotly.graph_objects as go
 
4
  from pre import preprocess_uploaded_file
5
 
 
 
6
  def generate_weekly_report(uploaded_files):
7
+ if not uploaded_files:
8
+ st.error("No files uploaded. Please upload CSV files for analysis.")
9
+ return
10
 
11
+ combined_data = pd.DataFrame()
 
 
12
  for uploaded_file in uploaded_files:
 
13
  data = preprocess_uploaded_file(uploaded_file)
14
+ combined_data = pd.concat([combined_data, data], ignore_index=True)
15
 
16
+ if combined_data.empty:
17
+ st.error("No data found in the uploaded files. Please check the file contents.")
18
+ return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
+ failed_data = combined_data[combined_data['Status'] == 'FAILED']
 
21
 
22
+ if failed_data.empty:
23
+ st.warning("No failed scenarios found in the uploaded data.")
24
+ return
25
 
26
+ failed_data['Date'] = pd.to_datetime(failed_data['Start datetime']).dt.date
 
 
27
 
28
+ # UI for selecting environments and functional areas
29
+ environments = combined_data['Environment'].unique()
30
+ selected_environments = st.multiselect("Select Environments", options=environments, default=environments)
31
 
32
+ all_functional_areas = failed_data['Functional area'].unique()
33
+ area_choice = st.radio("Choose Functional Areas to Display", ['All', 'Select Functional Areas'])
34
+
35
+ if area_choice == 'Select Functional Areas':
36
+ selected_functional_areas = st.multiselect("Select Functional Areas", options=all_functional_areas)
37
+ if not selected_functional_areas:
38
+ st.error("Please select at least one functional area.")
39
+ return
40
+ else:
41
+ selected_functional_areas = all_functional_areas
42
+
43
+ # Date range selection
44
+ min_date = failed_data['Date'].min()
45
+ max_date = failed_data['Date'].max()
46
+ col1, col2 = st.columns(2)
47
+ with col1:
48
+ start_date = st.date_input("Start Date", min_value=min_date, max_value=max_date, value=min_date)
49
+ with col2:
50
+ end_date = st.date_input("End Date", min_value=min_date, max_value=max_date, value=max_date)
51
+
52
+ # Filter data based on selections and date range
53
+ filtered_data = failed_data[
54
+ (failed_data['Environment'].isin(selected_environments)) &
55
+ (failed_data['Date'] >= start_date) &
56
+ (failed_data['Date'] <= end_date)
57
+ ]
58
+ if area_choice == 'Select Functional Areas':
59
+ filtered_data = filtered_data[filtered_data['Functional area'].isin(selected_functional_areas)]
60
+
61
+ # Group by Date, Environment, and Functional area
62
+ daily_failures = filtered_data.groupby(['Date', 'Environment', 'Functional area']).size().unstack(level=[1, 2], fill_value=0)
63
+
64
+ # Y-axis scaling option
65
+ y_axis_scale = st.radio("Y-axis Scaling", ["Fixed", "Dynamic"])
66
+
67
+ # Create an interactive plot using Plotly
68
+ fig = go.Figure()
69
+
70
+ for env in selected_environments:
71
+ if env in daily_failures.columns.levels[0]:
72
+ env_data = daily_failures[env]
73
+ if area_choice == 'All':
74
+ total_failures = env_data.sum(axis=1)
75
+ fig.add_trace(go.Scatter(x=daily_failures.index, y=total_failures,
76
+ mode='lines+markers', name=f'{env} - All Areas'))
77
+ else:
78
+ for area in selected_functional_areas:
79
+ if area in env_data.columns:
80
+ fig.add_trace(go.Scatter(x=daily_failures.index, y=env_data[area],
81
+ mode='lines+markers', name=f'{env} - {area}'))
82
+
83
+ fig.update_layout(
84
+ title='Failure Rates Comparison Across Environments Over Time',
85
+ xaxis_title='Date',
86
+ yaxis_title='Number of Failures',
87
+ legend_title='Environment - Functional Area',
88
+ hovermode='closest'
89
+ )
90
+
91
+ if y_axis_scale == "Fixed":
92
+ fig.update_yaxes(rangemode="tozero")
93
+ else:
94
+ pass
95
+
96
+ # Use st.plotly_chart to display the interactive chart
97
+ st.plotly_chart(fig, use_container_width=True)
98
+
99
+ # Add interactivity for scenario details
100
+ st.write("Select a date and environment to see detailed scenario information:")
101
+
102
+ selected_date = st.date_input("Select a date", min_value=start_date, max_value=end_date, value=start_date)
103
+ selected_env = st.selectbox("Select an environment", options=selected_environments)
104
+
105
+ if selected_date and selected_env:
106
+ st.write(f"### Detailed Scenarios for {selected_date} - {selected_env}")
107
+
108
+ day_scenarios = filtered_data[(filtered_data['Date'] == selected_date) &
109
+ (filtered_data['Environment'] == selected_env)]
110
+
111
+ if not day_scenarios.empty:
112
+ st.dataframe(day_scenarios[['Functional area', 'Scenario name', 'Error message', 'Time spent(m:s)']])
113
+ else:
114
+ st.write("No failing scenarios found for the selected date and environment.")
115
+
116
+ # Summary Statistics
117
+ st.write("### Summary Statistics")
118
+ for env in selected_environments:
119
+ env_data = filtered_data[filtered_data['Environment'] == env]
120
+ total_failures = len(env_data)
121
+
122
+ if len(daily_failures) > 0:
123
+ avg_daily_failures = total_failures / len(daily_failures)
124
+ if env in daily_failures.columns.levels[0]:
125
+ max_daily_failures = daily_failures[env].sum(axis=1).max()
126
+ min_daily_failures = daily_failures[env].sum(axis=1).min()
127
+ else:
128
+ max_daily_failures = min_daily_failures = 0
129
+ else:
130
+ avg_daily_failures = max_daily_failures = min_daily_failures = 0
131
 
132
+ st.write(f"**{env}**:")
133
+ st.write(f" - Total Failures: {total_failures}")
134
+ st.write(f" - Average Daily Failures: {avg_daily_failures:.2f}")
135
+ st.write(f" - Max Daily Failures: {max_daily_failures}")
136
+ st.write(f" - Min Daily Failures: {min_daily_failures}")
137
 
138
+ if area_choice == 'Select Functional Areas':
139
+ st.write("\n **Failures by Functional Area:**")
140
+ for area in selected_functional_areas:
141
+ area_total = len(env_data[env_data['Functional area'] == area])
142
+ st.write(f" - {area}: {area_total}")
 
 
 
 
 
 
 
 
143
 
144
+ st.write("---")
145
 
146
+ # Display raw data for verification
147
+ if st.checkbox("Show Raw Data"):
148
+ st.write(daily_failures)