Spaces:

BananaSauce
/

batch-run-csv-analyser

Sleeping

App Files Files Community

batch-run-csv-analyser / weekly.py

BananaSauce

Update weekly.py

cf4c4b7 over 1 year ago

raw

history blame

2.99 kB

	import pandas as pd
	import streamlit as st
	import matplotlib.pyplot as plt
	import io
	from pre import preprocess_uploaded_file

	from collections import defaultdict

	def generate_weekly_report(uploaded_files):

	# Create a dictionary to store the number of failures for each environment and day
	environment_daily_failures = {}

	for uploaded_file in uploaded_files:
	# Preprocess the uploaded CSV file (you can use your existing preprocessing code)
	data = preprocess_uploaded_file(uploaded_file)

	# Extract the start date from the 'Start datetime' column for this file
	start_date = data['Start datetime'].dt.date.iloc[0]

	# Calculate the number of failures for this file
	num_failures = len(data[data['Status'] == 'FAILED'])

	# Get the environment variable from the data frame
	environment = data['Environment'].iloc[0]

	# Create a unique key for each environment and day
	key = (environment, start_date)

	# Add the number of failures to the corresponding environment and day in the dictionary
	if key in environment_daily_failures:
	environment_daily_failures[key] += num_failures
	else:
	environment_daily_failures[key] = num_failures

	# Create a list of unique environments
	unique_environments = list(set([key[0] for key in environment_daily_failures.keys()]))

	# Create a larger line chart with separate lines for each environment
	plt.figure(figsize=(12, 8))

	for environment in unique_environments:
	# Filter the data for the current environment
	environment_data = [(key[1], value) for key, value in environment_daily_failures.items() if key[0] == environment]

	# Sort the data by date
	environment_data.sort(key=lambda x: x[0])

	# Extract dates and failures for the current environment
	dates = [date.strftime("%d-%b") for date, _ in environment_data]
	failures = [count for _, count in environment_data]

	# Plot the data as a line
	plt.plot(dates, failures, marker='o', linestyle='-', label=f'Environment: {environment}')

	plt.xlabel('Date', fontsize=14)
	plt.ylabel('Number of Failures', fontsize=14)
	plt.title('Trends in Failure Rates Over Days', fontsize=16)
	plt.xticks(rotation=45, fontsize=12)
	plt.yticks(fontsize=12)
	plt.grid(True)
	plt.legend(fontsize=12) # Add a legend to differentiate environments

	# Add labels with the number of failures at each data point with larger font
	for environment in unique_environments:
	environment_data = [(key[1], value) for key, value in environment_daily_failures.items() if key[0] == environment]
	for i in range(len(environment_data)):
	plt.text(environment_data[i][0].strftime("%d-%b"), environment_data[i][1], str(environment_data[i][1]), ha='center', va='bottom', fontsize=12)

	plt.tight_layout()

	# Display the larger line chart
	st.pyplot(plt)