Spaces:

BananaSauce
/

batch-run-csv-analyser

Sleeping

File size: 5,678 Bytes

import pandas as pd 
import streamlit as st
import plotly.graph_objects as go
from pre import preprocess_uploaded_file

def convert_df(df):
    return df.to_csv(index=False).encode('utf-8')

def double_main(uploaded_file1, uploaded_file2):
    if uploaded_file1 is None or uploaded_file2 is None:
        st.warning("Please upload both CSV files for comparison.")
        return

    # Preprocess the uploaded CSV files
    data_1 = preprocess_uploaded_file(uploaded_file1) 
    data_2 = preprocess_uploaded_file(uploaded_file2)

    # Determine which file is older and newer
    if data_1['Start datetime'].min() < data_2['Start datetime'].min():
        older_df, newer_df = data_1, data_2
    else:
        older_df, newer_df = data_2, data_1

    # Convert time columns to MM:SS format  
    older_df['Time spent'] = pd.to_datetime(older_df['Time spent'], unit='s').dt.strftime('%M:%S')
    newer_df['Time spent'] = pd.to_datetime(newer_df['Time spent'], unit='s').dt.strftime('%M:%S')

    # Get start datetime of each file  
    older_datetime = older_df['Start datetime'].min()
    newer_datetime = newer_df['Start datetime'].min()
    
    # Display start datetime of each file
    st.write(f"The older csv started on {older_datetime}") 
    st.write(f"The newer csv started on {newer_datetime}")

    # Merge dataframes on 'scenario name' 
    merged_df = pd.merge(older_df, newer_df, on=['Functional area', 'Scenario name'], suffixes=('_old', '_new'))

    # Filter scenarios
    fail_to_fail_scenarios = merged_df[(merged_df['Status_old'] == 'FAILED') & (merged_df['Status_new'] == 'FAILED')]
    pass_to_fail_scenarios = merged_df[(merged_df['Status_old'] == 'PASSED') & (merged_df['Status_new'] == 'FAILED')]
    fail_to_pass_scenarios = merged_df[(merged_df['Status_old'] == 'FAILED') & (merged_df['Status_new'] == 'PASSED')]

    # Get counts
    fail_count = len(fail_to_fail_scenarios)
    pass_fail_count = len(pass_to_fail_scenarios)
    pass_count = len(fail_to_pass_scenarios)

    # Display summary chart
    status_counts = {
        'Consistent Failures': fail_count,
        'New Failures': pass_fail_count,
        'New Passes': pass_count
    }
    status_df = pd.DataFrame.from_dict(status_counts, orient='index', columns=['Count'])
    
    st.subheader("Summary of Scenario Status Changes")
    
    # Create a bar chart using Plotly
    fig = go.Figure(data=[
        go.Bar(
            x=status_df.index,
            y=status_df['Count'],
            text=status_df['Count'],
            textposition='outside',
            textfont=dict(size=14),
            marker_color=['#1f77b4', '#ff7f0e', '#2ca02c'],  # Custom colors for each bar
            width=0.6  # Adjust bar width
        )
    ])
    
    # Customize the layout
    fig.update_layout(
        yaxis=dict(
            title='Count',
            range=[0, max(status_df['Count']) * 1.1]  # Extend y-axis range by 10% to fit labels
        ),
        xaxis_title="Status",
        hoverlabel=dict(bgcolor="white", font_size=16),
        margin=dict(l=20, r=20, t=40, b=20),
        uniformtext_minsize=8,
        uniformtext_mode='hide'
    )
    
    # Ensure all bars are visible
    fig.update_traces(marker_line_width=1, marker_line_color="black", selector=dict(type="bar"))
    
    # Add hover text
    fig.update_traces(
        hovertemplate="<b>%{x}</b><br>Count: %{y}<extra></extra>"
    )
    
    # Display the chart
    st.plotly_chart(fig, use_container_width=True)

    # Use tabs to display data
    tab1, tab2, tab3 = st.tabs(["Consistent Failures", "New Failures", "New Passes"])

    with tab1:
        st.write(f"Failing scenarios Count: {fail_count}")
        columns_to_display1 = ['Functional area', 'Scenario name', 'Error message_old', 'Error message_new']
        st.dataframe(fail_to_fail_scenarios[columns_to_display1])
        csv = convert_df(fail_to_fail_scenarios[columns_to_display1])
        st.download_button("Download Consistent Failures as CSV", data=csv, file_name='consistent_failures.csv', mime='text/csv')

    with tab2:
        st.write(f"Failing scenarios Count: {pass_fail_count}")
        columns_to_display2 = ['Functional area', 'Scenario name', 'Error message_new', 'Time spent_old', 'Time spent_new']
        st.dataframe(pass_to_fail_scenarios[columns_to_display2])
        csv = convert_df(pass_to_fail_scenarios[columns_to_display2])
        st.download_button("Download New Failures as CSV", data=csv, file_name='new_failures.csv', mime='text/csv')

    with tab3:
        st.write(f"Passing scenarios Count: {pass_count}")
        columns_to_display3 = ['Functional area', 'Scenario name', 'Error message_old', 'Time spent_old', 'Time spent_new']
        st.dataframe(fail_to_pass_scenarios[columns_to_display3])
        csv = convert_df(fail_to_pass_scenarios[columns_to_display3])
        st.download_button("Download New Passes as CSV", data=csv, file_name='new_passes.csv', mime='text/csv')

def main():
    st.title("CSV Comparison Tool")

    st.markdown("""
    This tool compares two CSV files and highlights the differences in the scenarios.
    Please upload the older and newer CSV files below.
    """)

    col1, col2 = st.columns(2)

    with col1:
        uploaded_file1 = st.file_uploader("Upload the older CSV file", type='csv', key='uploader1')

    with col2:
        uploaded_file2 = st.file_uploader("Upload the newer CSV file", type='csv', key='uploader2')

    if uploaded_file1 is not None and uploaded_file2 is not None:
        with st.spinner('Processing...'):
            double_main(uploaded_file1, uploaded_file2)
        st.success('Comparison Complete!')

if __name__ == "__main__":
    main()