Spaces:

BananaSauce
/

batch-run-csv-analyser

Sleeping

File size: 2,992 Bytes

cfad59e
 
 
 
cf4c4b7
cfad59e
 
 
 
cf4c4b7
 
 
cfad59e
 
 
 
 
 
 
 
 
 
 
cf4c4b7
 
cfad59e
cf4c4b7
 
cfad59e
cf4c4b7
 
 
 
 
cfad59e
cf4c4b7
 
cfad59e
cf4c4b7
 
cfad59e
cf4c4b7
 
 
cfad59e
cf4c4b7
 
cfad59e
cf4c4b7
 
 
 
 
 
 
 
 
 
 
 
 
 
cfad59e
 
cf4c4b7
 
 
 
cfad59e
 
cf4c4b7

import pandas as pd
import streamlit as st
import matplotlib.pyplot as plt
import io
from pre import preprocess_uploaded_file

from collections import defaultdict

def generate_weekly_report(uploaded_files):

    # Create a dictionary to store the number of failures for each environment and day
    environment_daily_failures = {}

    for uploaded_file in uploaded_files:
        # Preprocess the uploaded CSV file (you can use your existing preprocessing code)
        data = preprocess_uploaded_file(uploaded_file)

        # Extract the start date from the 'Start datetime' column for this file
        start_date = data['Start datetime'].dt.date.iloc[0]

        # Calculate the number of failures for this file
        num_failures = len(data[data['Status'] == 'FAILED'])

        # Get the environment variable from the data frame
        environment = data['Environment'].iloc[0]

        # Create a unique key for each environment and day
        key = (environment, start_date)

        # Add the number of failures to the corresponding environment and day in the dictionary
        if key in environment_daily_failures:
            environment_daily_failures[key] += num_failures
        else:
            environment_daily_failures[key] = num_failures

    # Create a list of unique environments
    unique_environments = list(set([key[0] for key in environment_daily_failures.keys()]))

    # Create a larger line chart with separate lines for each environment
    plt.figure(figsize=(12, 8))

    for environment in unique_environments:
        # Filter the data for the current environment
        environment_data = [(key[1], value) for key, value in environment_daily_failures.items() if key[0] == environment]

        # Sort the data by date
        environment_data.sort(key=lambda x: x[0])

        # Extract dates and failures for the current environment
        dates = [date.strftime("%d-%b") for date, _ in environment_data]
        failures = [count for _, count in environment_data]

        # Plot the data as a line
        plt.plot(dates, failures, marker='o', linestyle='-', label=f'Environment: {environment}')

    plt.xlabel('Date', fontsize=14)
    plt.ylabel('Number of Failures', fontsize=14)
    plt.title('Trends in Failure Rates Over Days', fontsize=16)
    plt.xticks(rotation=45, fontsize=12)
    plt.yticks(fontsize=12)
    plt.grid(True)
    plt.legend(fontsize=12)  # Add a legend to differentiate environments
    
    # Add labels with the number of failures at each data point with larger font
    for environment in unique_environments:
        environment_data = [(key[1], value) for key, value in environment_daily_failures.items() if key[0] == environment]
        for i in range(len(environment_data)):
            plt.text(environment_data[i][0].strftime("%d-%b"), environment_data[i][1], str(environment_data[i][1]), ha='center', va='bottom', fontsize=12)

    plt.tight_layout()

    # Display the larger line chart
    st.pyplot(plt)