Spaces:

BananaSauce
/

batch-run-csv-analyser

Sleeping

App Files Files Community

BananaSauce commited on Aug 23, 2024

Commit

cd8fcb3

verified ·

1 Parent(s): e24c1ad

Update weekly.py

Browse files

Files changed (1) hide show

weekly.py +130 -55

weekly.py CHANGED Viewed

@@ -1,73 +1,148 @@
 import pandas as pd
 import streamlit as st
-import matplotlib.pyplot as plt
-import io
 from pre import preprocess_uploaded_file
-from collections import defaultdict
 def generate_weekly_report(uploaded_files):
-    # Create a dictionary to store the number of failures for each environment and day
-    environment_daily_failures = {}
     for uploaded_file in uploaded_files:
-        # Preprocess the uploaded CSV file (you can use your existing preprocessing code)
         data = preprocess_uploaded_file(uploaded_file)
-        # Extract the start date from the 'Start datetime' column for this file
-        start_date = data['Start datetime'].dt.date.iloc[0]
-        # Calculate the number of failures for this file
-        num_failures = len(data[data['Status'] == 'FAILED'])
-        # Get the environment variable from the data frame
-        environment = data['Environment'].iloc[0]
-        # Create a unique key for each environment and day
-        key = (environment, start_date)
-        # Add the number of failures to the corresponding environment and day in the dictionary
-        if key in environment_daily_failures:
-            environment_daily_failures[key] += num_failures
-        else:
-            environment_daily_failures[key] = num_failures
-    # Create a list of unique environments
-    unique_environments = list(set([key[0] for key in environment_daily_failures.keys()]))
-    # Create a larger line chart with separate lines for each environment
-    plt.figure(figsize=(12, 8))
-    for environment in unique_environments:
-        # Filter the data for the current environment
-        environment_data = [(key[1], value) for key, value in environment_daily_failures.items() if key[0] == environment]
-        # Sort the data by date
-        environment_data.sort(key=lambda x: x[0])
-        # Extract dates and failures for the current environment
-        dates = [date.strftime("%d-%b") for date, _ in environment_data]
-        failures = [count for _, count in environment_data]
-        # Plot the data as a line
-        plt.plot(dates, failures, marker='o', linestyle='-', label=f'Environment: {environment}')
-    plt.xlabel('Date', fontsize=14)
-    plt.ylabel('Number of Failures', fontsize=14)
-    plt.title('Trends in Failure Rates Over Days', fontsize=16)
-    plt.xticks(rotation=45, fontsize=12)
-    plt.yticks(fontsize=12)
-    plt.grid(True)
-    plt.legend(fontsize=12)  # Add a legend to differentiate environments
-    # Add labels with the number of failures at each data point with larger font
-    for environment in unique_environments:
-        environment_data = [(key[1], value) for key, value in environment_daily_failures.items() if key[0] == environment]
-        for i in range(len(environment_data)):
-            plt.text(environment_data[i][0].strftime("%d-%b"), environment_data[i][1], str(environment_data[i][1]), ha='center', va='bottom', fontsize=12)
-    plt.tight_layout()
-    # Display the larger line chart
-    st.pyplot(plt)

 import pandas as pd
 import streamlit as st
+import plotly.graph_objects as go
 from pre import preprocess_uploaded_file
 def generate_weekly_report(uploaded_files):
+    if not uploaded_files:
+        st.error("No files uploaded. Please upload CSV files for analysis.")
+        return
+    combined_data = pd.DataFrame()
     for uploaded_file in uploaded_files:
         data = preprocess_uploaded_file(uploaded_file)
+        combined_data = pd.concat([combined_data, data], ignore_index=True)
+    if combined_data.empty:
+        st.error("No data found in the uploaded files. Please check the file contents.")
+        return
+    failed_data = combined_data[combined_data['Status'] == 'FAILED']
+    if failed_data.empty:
+        st.warning("No failed scenarios found in the uploaded data.")
+        return
+    failed_data['Date'] = pd.to_datetime(failed_data['Start datetime']).dt.date
+    # UI for selecting environments and functional areas
+    environments = combined_data['Environment'].unique()
+    selected_environments = st.multiselect("Select Environments", options=environments, default=environments)
+    all_functional_areas = failed_data['Functional area'].unique()
+    area_choice = st.radio("Choose Functional Areas to Display", ['All', 'Select Functional Areas'])
+    if area_choice == 'Select Functional Areas':
+        selected_functional_areas = st.multiselect("Select Functional Areas", options=all_functional_areas)
+        if not selected_functional_areas:
+            st.error("Please select at least one functional area.")
+            return
+    else:
+        selected_functional_areas = all_functional_areas
+    # Date range selection
+    min_date = failed_data['Date'].min()
+    max_date = failed_data['Date'].max()
+    col1, col2 = st.columns(2)
+    with col1:
+        start_date = st.date_input("Start Date", min_value=min_date, max_value=max_date, value=min_date)
+    with col2:
+        end_date = st.date_input("End Date", min_value=min_date, max_value=max_date, value=max_date)
+    # Filter data based on selections and date range
+    filtered_data = failed_data[
+        (failed_data['Environment'].isin(selected_environments)) &
+        (failed_data['Date'] >= start_date) &
+        (failed_data['Date'] <= end_date)
+    ]
+    if area_choice == 'Select Functional Areas':
+        filtered_data = filtered_data[filtered_data['Functional area'].isin(selected_functional_areas)]
+    # Group by Date, Environment, and Functional area
+    daily_failures = filtered_data.groupby(['Date', 'Environment', 'Functional area']).size().unstack(level=[1, 2], fill_value=0)
+    # Y-axis scaling option
+    y_axis_scale = st.radio("Y-axis Scaling", ["Fixed", "Dynamic"])
+    # Create an interactive plot using Plotly
+    fig = go.Figure()
+    for env in selected_environments:
+        if env in daily_failures.columns.levels[0]:
+            env_data = daily_failures[env]
+            if area_choice == 'All':
+                total_failures = env_data.sum(axis=1)
+                fig.add_trace(go.Scatter(x=daily_failures.index, y=total_failures,
+                                         mode='lines+markers', name=f'{env} - All Areas'))
+            else:
+                for area in selected_functional_areas:
+                    if area in env_data.columns:
+                        fig.add_trace(go.Scatter(x=daily_failures.index, y=env_data[area],
+                                                 mode='lines+markers', name=f'{env} - {area}'))
+    fig.update_layout(
+        title='Failure Rates Comparison Across Environments Over Time',
+        xaxis_title='Date',
+        yaxis_title='Number of Failures',
+        legend_title='Environment - Functional Area',
+        hovermode='closest'
+    )
+    if y_axis_scale == "Fixed":
+        fig.update_yaxes(rangemode="tozero")
+    else:
+        pass
+    # Use st.plotly_chart to display the interactive chart
+    st.plotly_chart(fig, use_container_width=True)
+    # Add interactivity for scenario details
+    st.write("Select a date and environment to see detailed scenario information:")
+    selected_date = st.date_input("Select a date", min_value=start_date, max_value=end_date, value=start_date)
+    selected_env = st.selectbox("Select an environment", options=selected_environments)
+    if selected_date and selected_env:
+        st.write(f"### Detailed Scenarios for {selected_date} - {selected_env}")
+        day_scenarios = filtered_data[(filtered_data['Date'] == selected_date) &
+                                      (filtered_data['Environment'] == selected_env)]
+        if not day_scenarios.empty:
+            st.dataframe(day_scenarios[['Functional area', 'Scenario name', 'Error message', 'Time spent(m:s)']])
+        else:
+            st.write("No failing scenarios found for the selected date and environment.")
+    # Summary Statistics
+    st.write("### Summary Statistics")
+    for env in selected_environments:
+        env_data = filtered_data[filtered_data['Environment'] == env]
+        total_failures = len(env_data)
+        if len(daily_failures) > 0:
+            avg_daily_failures = total_failures / len(daily_failures)
+            if env in daily_failures.columns.levels[0]:
+                max_daily_failures = daily_failures[env].sum(axis=1).max()
+                min_daily_failures = daily_failures[env].sum(axis=1).min()
+            else:
+                max_daily_failures = min_daily_failures = 0
+        else:
+            avg_daily_failures = max_daily_failures = min_daily_failures = 0
+        st.write(f"**{env}**:")
+        st.write(f"  - Total Failures: {total_failures}")
+        st.write(f"  - Average Daily Failures: {avg_daily_failures:.2f}")
+        st.write(f"  - Max Daily Failures: {max_daily_failures}")
+        st.write(f"  - Min Daily Failures: {min_daily_failures}")
+        if area_choice == 'Select Functional Areas':
+            st.write("\n  **Failures by Functional Area:**")
+            for area in selected_functional_areas:
+                area_total = len(env_data[env_data['Functional area'] == area])
+                st.write(f"    - {area}: {area_total}")
+        st.write("---")
+    # Display raw data for verification
+    if st.checkbox("Show Raw Data"):
+        st.write(daily_failures)