Spaces:

EDS-lab
/

Transparency_Plus

Running

App Files Files Community

mmmapms commited on Aug 22

Commit

6872779

•

1 Parent(s): 7bf8667

Update app.py

Browse files

Files changed (1) hide show

app.py +674 -674

app.py CHANGED Viewed

@@ -1,675 +1,675 @@
-import requests
-import pandas as pd
-from io import StringIO
-import streamlit as st
-import os
-import plotly.express as px
-import plotly.graph_objects as go
-import plotly.colors as pc
-import numpy as np
-from sklearn.metrics import mean_squared_error
-from statsmodels.tsa.stattools import acf
-from statsmodels.graphics.tsaplots import plot_acf
-import matplotlib.pyplot as plt
-##GET ALL FILES FROM GITHUB
-def load_GitHub(github_token, file_name):
-    url = f'https://raw.githubusercontent.com/margaridamascarenhas/Transparency_Data/main/{file_name}'
-    headers = {'Authorization': f'token {github_token}'}
-    response = requests.get(url, headers=headers)
-    if response.status_code == 200:
-        csv_content = StringIO(response.text)
-        df = pd.read_csv(csv_content)
-        if 'Date' in df.columns:
-            df['Date'] = pd.to_datetime(df['Date'])  # Convert 'Date' column to datetime
-            df.set_index('Date', inplace=True)  # Set 'Date' column as the index
-            #df.to_csv(file_name)
-        return df
-    else:
-        print(f"Failed to download {file_name}. Status code: {response.status_code}")
-        return None
-def load_forecast(github_token):
-    predictions_dict = {}
-    for hour in range(24):
-        file_name = f'Predictions_{hour}h.csv'
-        df = load_GitHub(github_token, file_name)
-        if df is not None:
-            predictions_dict[file_name] = df
-    return predictions_dict
-def convert_European_time(data, time_zone):
-    data.index = pd.to_datetime(data.index, utc=True)
-    data.index = data.index.tz_convert(time_zone)
-    data.index = data.index.tz_localize(None)
-    return data
-github_token = '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'
-if github_token:
-    forecast_dict = load_forecast(github_token)
-    historical_forecast=load_GitHub(github_token, 'Historical_forecast.csv')
-    Data_BE=load_GitHub(github_token, 'BE_Elia_Entsoe_UTC.csv')
-    Data_FR=load_GitHub(github_token, 'FR_Entsoe_UTC.csv')
-    Data_NL=load_GitHub(github_token, 'NL_Entsoe_UTC.csv')
-    Data_DE=load_GitHub(github_token, 'DE_Entsoe_UTC.csv')
-    Data_BE=convert_European_time(Data_BE, 'Europe/Brussels')
-    Data_FR=convert_European_time(Data_FR, 'Europe/Paris')
-    Data_NL=convert_European_time(Data_NL, 'Europe/Amsterdam')
-    Data_DE=convert_European_time(Data_DE, 'Europe/Berlin')
-else:
-    print("Please enter your GitHub Personal Access Token to proceed.")
-def conformal_predictions(data, target, my_forecast):
-    data['Residuals'] = data[my_forecast] - data[actual_col]
-    data['Hour'] = data.index.hour
-    min_date = data.index.min()
-    for date in data.index.normalize().unique():
-        if date >= min_date + pd.DateOffset(days=30):
-            start_date = date - pd.DateOffset(days=30)
-            end_date = date
-            calculation_window = data[start_date:end_date-pd.DateOffset(hours=1)]
-            quantiles = calculation_window.groupby('Hour')['Residuals'].quantile(0.8)
-            # Use .loc to safely access and modify data
-            if date in data.index:
-                current_day_data = data.loc[date.strftime('%Y-%m-%d')]
-                for hour in current_day_data['Hour'].unique():
-                    if hour in quantiles.index:
-                        hour_quantile = quantiles[hour]
-                        idx = (data.index.normalize() == date) & (data.Hour == hour)
-                        data.loc[idx, 'Quantile_80'] = hour_quantile
-                        data.loc[idx, 'Lower_Interval'] = data.loc[idx, my_forecast] - hour_quantile
-                        data.loc[idx, 'Upper_Interval'] = data.loc[idx, my_forecast] + hour_quantile
-    #data.reset_index(inplace=True)
-    return data
-st.title("Transparency++")
-countries = {
-    'Belgium': 'BE',
-    'Netherlands': 'NL',
-    'Germany': 'DE',
-    'France': 'FR',
-}
-st.sidebar.header('Filters')
-selected_country = st.sidebar.selectbox('Select Country', list(countries.keys()))
-st.write()
-date_range = st.sidebar.date_input("Select Date Range for Metrics Calculation:",
-                                   value=(pd.to_datetime("2024-01-01"), pd.to_datetime(pd.Timestamp('today'))))
-# Ensure the date range provides two dates
-if len(date_range) == 2:
-    start_date = pd.Timestamp(date_range[0])
-    end_date = pd.Timestamp(date_range[1])
-else:
-    st.error("Please select a valid date range.")
-    st.stop()
-# Sidebar with radio buttons for different sections
-section = st.sidebar.radio('Section', ['Data', 'Forecasts', 'Insights'])
-country_code = countries[selected_country]
-if country_code == 'BE':
-    data = Data_BE
-    weather_columns = ['Temperature', 'Wind Speed Onshore', 'Wind Speed Offshore']
-    data['Temperature'] = data['temperature_2m_8']
-    data['Wind Speed Offshore'] = data['wind_speed_100m_4']
-    data['Wind Speed Onshore'] = data['wind_speed_100m_8']
-elif country_code == 'DE':
-    data = Data_DE
-    weather_columns = ['Temperature', 'Wind Speed']
-    data['Temperature'] = data['temperature_2m']
-    data['Wind Speed'] = data['wind_speed_100m']
-elif country_code == 'NL':
-    data = Data_NL
-    weather_columns = ['Temperature', 'Wind Speed']
-    data['Temperature'] = data['temperature_2m']
-    data['Wind Speed'] = data['wind_speed_100m']
-elif country_code == 'FR':
-    data = Data_FR
-    weather_columns = ['Temperature', 'Wind Speed']
-    data['Temperature'] = data['temperature_2m']
-    data['Wind Speed'] = data['wind_speed_100m']
-def add_feature(df2, df_main):
-    #df_main.index = pd.to_datetime(df_main.index)
-    #df2.index = pd.to_datetime(df2.index)
-    df_combined = df_main.combine_first(df2)
-    last_date_df1 = df_main.index.max()
-    first_date_df2 = df2.index.min()
-    if first_date_df2 == last_date_df1 + pd.Timedelta(hours=1):
-        df_combined = pd.concat([df_main, df2[df2.index > last_date_df1]], axis=0)
-    #df_combined.reset_index(inplace=True)
-    return df_combined
-#data.index = data.index.tz_localize('UTC')
-data = data.loc[start_date:end_date]
-forecast_columns = [
-    'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
-if section == 'Data':
-    st.header("Data")
-    st.write("""
-    This section allows you to explore and upload your datasets.
-    You can visualize raw data, clean it, and prepare it for analysis.
-    """)
-    st.header('Data Quality')
-    output_text = f"The below percentages are calculated from the selected date range from {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}. This interval can be adjusted from the sidebar."
-    st.write(output_text)
-    # Report % of missing values
-    missing_values = data[forecast_columns].isna().mean() * 100
-    missing_values = missing_values.round(2)
-    installed_capacities = {
-        'FR': { 'Solar': 17419, 'Wind Offshore': 1483, 'Wind Onshore': 22134},
-        'DE': { 'Solar': 73821, 'Wind Offshore': 8386, 'Wind Onshore': 59915},
-        'BE': { 'Solar': 8789, 'Wind Offshore': 2262, 'Wind Onshore': 3053},
-        'NL': { 'Solar': 22590, 'Wind Offshore': 3220, 'Wind Onshore': 6190},
-    }
-    if country_code not in installed_capacities:
-        st.error(f"Installed capacities not defined for country code '{country_code}'.")
-        st.stop()
-    # Report % of extreme, impossible values for the selected country
-    capacities = installed_capacities[country_code]
-    extreme_values = {}
-    for col in forecast_columns:
-            if 'Solar_entsoe' in col:
-                extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Solar'])).mean() * 100
-            elif 'Solar_forecast_entsoe' in col:
-                extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Solar'])).mean() * 100
-            elif 'Wind_onshore_entsoe' in col:
-                extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Onshore'])).mean() * 100
-            elif 'Wind_onshore_forecast_entsoe' in col:
-                extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Onshore'])).mean() * 100
-            elif 'Wind_offshore_entsoe' in col:
-                extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Offshore'])).mean() * 100
-            elif 'Wind_offshore_forecast_entsoe' in col:
-                extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Offshore'])).mean() * 100
-            elif 'Load_entsoe' in col:
-                extreme_values[col] = ((data[col] < 0)).mean() * 100
-            elif 'Load_forecast_entsoe' in col:
-                extreme_values[col] = ((data[col] < 0)).mean() * 100
-    extreme_values = pd.Series(extreme_values).round(2)
-    # Combine all metrics into one DataFrame
-    metrics_df = pd.DataFrame({
-    'Missing Values (%)': missing_values,
-    'Extreme/Nonsensical Values (%)': extreme_values,
-    })
-    st.markdown(
-    """
-    <style>
-    .dataframe {font-size: 45px !important;}
-    </style>
-    """,
-    unsafe_allow_html=True
-    )
-    st.dataframe(metrics_df)
-    st.write('<b><u>Missing values (%)</u></b>: Percentage of missing values in the dataset', unsafe_allow_html=True)
-    st.write('<b><u>Extreme/Nonsensical values (%)</u></b>: Values that are considered implausible such as negative or out-of-bound values i.e., (generation<0) or (generation>capacity)', unsafe_allow_html=True)
-# Section 2: Forecasts
-elif section == 'Forecasts':
-    st.header('Forecast Quality')
-    # Time series for last 1 week
-    st.subheader('Time Series: Last 1 Week')
-    last_week = Data_BE.loc[Data_BE.index >= (data.index[-1] - pd.Timedelta(days=7))]
-    st.write('The below plots show the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform between the selected data range.')
-    forecast_columns_operational = [
-    'Load_entsoe','Load_forecast_entsoe', 'Load_LightGBMModel.7D.TimeCov.Temp.Forecast_elia', 'Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_onshore_LightGBMModel.1D.TimeCov.Temp.Forecast_elia','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Wind_offshore_LightGBMModel.1D.TimeCov.Temp.Forecast_elia','Solar_entsoe','Solar_forecast_entsoe', 'Solar_LightGBMModel.1D.TimeCov.Temp.Forecast_elia']
-    forecast_columns = [
-    'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
-    operation_forecast_load=forecast_dict['Predictions_10h.csv'].filter(like='Load_', axis=1)
-    operation_forecast_res=forecast_dict['Predictions_17h.csv'].filter(regex='^(?!Load_)')
-    operation_forecast_load.columns = [col.replace('_entsoe.', '_').replace('Naive.7D', 'WeeklyNaiveSeasonal') for col in operation_forecast_load.columns]
-    operation_forecast_res.columns = [col.replace('_entsoe.', '_').replace('Naive.1D', 'DailyNaiveSeasonal') for col in operation_forecast_res.columns]
-    Historical_and_Load=add_feature(operation_forecast_load, historical_forecast)
-    Historical_and_operational=add_feature(operation_forecast_res, Historical_and_Load)
-    #print(Historical_and_operational.filter(like='Forecast_elia', axis=1))
-    best_forecast = Historical_and_operational.filter(like='Forecast_elia', axis=1)
-    df_combined = Historical_and_operational.join(Data_BE, how='inner')
-    last_week_best_forecast = best_forecast.loc[best_forecast.index >= (best_forecast.index[-24] - pd.Timedelta(days=8))]
-    for i in range(0, len(forecast_columns_operational), 3):
-        actual_col = forecast_columns_operational[i]
-        forecast_col = forecast_columns_operational[i + 1]
-        my_forecast = forecast_columns_operational[i + 2]
-        if forecast_col in data.columns:
-            fig = go.Figure()
-            fig.add_trace(go.Scatter(x=last_week.index, y=last_week[actual_col], mode='lines', name='Actual'))
-            fig.add_trace(go.Scatter(x=last_week.index, y=last_week[forecast_col], mode='lines', name='Forecast ENTSO-E'))
-            if country_code=='BE':
-                conformal=conformal_predictions(df_combined, actual_col, my_forecast)
-                last_week_conformal = conformal.loc[conformal.index >= (conformal.index[-24] - pd.Timedelta(days=8))]
-                if actual_col =='Load_entsoe':
-                    last_week_conformal = conformal.loc[conformal.index >= (conformal.index[-24] - pd.Timedelta(days=5))]
-                fig.add_trace(go.Scatter(x=last_week_best_forecast.index, y=last_week_best_forecast[my_forecast], mode='lines', name='Forecast EDS'))
-                fig.add_trace(go.Scatter(
-                    x=last_week_conformal.index,
-                    y=last_week_conformal['Lower_Interval'],
-                    mode='lines',
-                    line=dict(width=0),
-                    showlegend=False
-                ))
-                # Add the upper interval trace and fill to the lower interval
-                fig.add_trace(go.Scatter(
-                    x=last_week_conformal.index,
-                    y=last_week_conformal['Upper_Interval'],
-                    mode='lines',
-                    line=dict(width=0),
-                    fill='tonexty',  # Fill between this trace and the previous one
-                    fillcolor='rgba(68, 68, 68, 0.3)',
-                    name='P10/P90 prediction intervals'
-                ))
-            fig.update_layout(title=f'Forecasts vs Actual for {actual_col}', xaxis_title='Date', yaxis_title='Value [MW]')
-            st.plotly_chart(fig)
-    def plot_category(df_dict, category_prefix, title):
-        fig = go.Figure()
-        # Define base colors for each model
-        model_colors = {
-            'LightGBMModel.TimeCov.Temp.Forecast_elia': '#1f77b4',  # Blue
-            'LightGBMModel.TimeCov.Temp': '#2ca02c',  # Green
-            'Naive': '#ff7f0e'  # Orange
-        }
-        # To keep track of which model has been added to the legend
-        legend_added = {'LightGBMModel.TimeCov.Temp.Forecast_elia': False, 'LightGBMModel.TimeCov.Temp': False, 'Naive': False}
-        for file_name, df in df_dict.items():
-            # Extract the hour from the filename, assuming the format is "Predictions_Xh.csv"
-            hour = int(file_name.split('_')[1].replace('h.csv', ''))
-            filtered_columns = [col for col in df.columns if col.startswith(category_prefix)]
-            for column in filtered_columns:
-                # Identify the model type with more precise logic
-                if 'LightGBMModel' in column:
-                    if 'Forecast_elia' in column:
-                        model_key = 'LightGBMModel.TimeCov.Temp.Forecast_elia'
-                    elif 'TimeCov' in column:
-                        model_key = 'LightGBMModel.TimeCov.Temp'
-                elif 'Naive' in column:
-                    model_key = 'Naive'
-                else:
-                    continue  # Skip if it doesn't match any model type
-                # Extract the relevant part of the model name
-                parts = column.split('.')
-                model_name_parts = parts[1:]  # Skip the variable prefix
-                model_name = '.'.join(model_name_parts)  # Rejoin the parts to form the model name
-                # Get the base color for the model
-                base_color = model_colors[model_key]
-                # Calculate the color shade based on the hour
-                color_scale = pc.hex_to_rgb(base_color)
-                scale_factor = 0.3 + (hour / 40)  # Adjust scale to ensure the gradient is visible
-                adjusted_color = tuple(int(c * scale_factor) for c in color_scale)
-                # Convert to RGBA with transparency for plot lines
-                line_color = f'rgba({adjusted_color[0]}, {adjusted_color[1]}, {adjusted_color[2]}, 0.1)'  # Transparent color for lines
-                # Combine the hour and the model name for the legend, but only add the legend entry once
-                show_legend = not legend_added[model_key]
-                fig.add_trace(go.Scatter(
-                    x=df.index,  # Assuming 'Date' is the index, use 'df.index' for x-axis
-                    y=df[column],
-                    mode='lines',
-                    name=model_name if show_legend else None,  # Use the model name for the legend, but only once
-                    line=dict(color=base_color if show_legend else line_color),  # Use opaque color for legend, transparent for lines
-                    showlegend=show_legend,  # Show legend only once per model
-                    legendgroup=model_key  # Grouping for consistent legend color
-                ))
-                # Mark that this model has been added to the legend
-                if show_legend:
-                    legend_added[model_key] = True
-            # Add real values as a separate trace, if provided
-            filtered_Data_BE_df = Data_BE.loc[df.index]
-        if filtered_Data_BE_df[f'{category_prefix}_entsoe'].notna().any():
-            fig.add_trace(go.Scatter(
-                x=filtered_Data_BE_df.index,
-                y=filtered_Data_BE_df[f'{category_prefix}_entsoe'],
-                mode='lines',
-                name=f'Actual {category_prefix}',
-                line=dict(color='black', width=2),  # Black line for real values
-                showlegend=True  # Always show this in the legend
-            ))
-        # Update layout to position the legend at the top, side by side
-        fig.update_layout(
-            title=dict(
-                text=title,
-                x=0,  # Center the title horizontally
-                y=1.00,  # Slightly lower the title to create more space
-                xanchor='left',
-                yanchor='top'
-            ),
-            xaxis_title='Date',
-            yaxis_title='Value',
-            legend=dict(
-                orientation="h",  # Horizontal legend
-                yanchor="bottom",  # Align to the bottom of the legend box
-                y=1,  # Increase y position to avoid overlap with the title
-                xanchor="center",  # Center the legend horizontally
-                x=0.5  # Position at the center of the plot
-            )
-        )
-        return fig
-    if country_code == "BE":
-        st.header('EDS Forecasts by Hour')
-        solar_fig = plot_category(forecast_dict, 'Solar', 'Solar Predictions')
-        st.plotly_chart(solar_fig)
-        wind_offshore_fig = plot_category(forecast_dict, 'Wind_offshore', 'Wind Offshore Predictions')
-        st.plotly_chart(wind_offshore_fig)
-        wind_onshore_fig = plot_category(forecast_dict, 'Wind_onshore', 'Wind Onshore Predictions')
-        st.plotly_chart(wind_onshore_fig)
-        load_fig = plot_category(forecast_dict, 'Load', 'Load Predictions')
-        st.plotly_chart(load_fig)
-    # Scatter plots for error distribution
-    st.subheader('Error Distribution')
-    st.write('The below scatter plots show the error distribution of all three fields: Solar, Wind and Load between the selected date range')
-    for i in range(0, len(forecast_columns), 2):
-        actual_col = forecast_columns[i]
-        forecast_col = forecast_columns[i + 1]
-        if forecast_col in data.columns:
-            obs = last_week[actual_col]
-            pred = last_week[forecast_col]
-            error = pred - obs
-            fig = px.scatter(x=obs, y=pred, labels={'x': 'Observed [MW]', 'y': 'Predicted by ENTSO-E [MW]'})
-            fig.update_layout(title=f'Error Distribution for {forecast_col}')
-            st.plotly_chart(fig)
-    st.subheader('Accuracy Metrics (Sorted by rMAE):')
-    if country_code == "BE":
-        # Combine the two DataFrames on their index
-        df_combined = Historical_and_operational.join(Data_BE, how='inner')
-        # List of model columns from historical_forecast
-        model_columns = historical_forecast.columns
-        # Initialize dictionaries to store MAE and RMSE results for each variable
-        results_wind_onshore = {}
-        results_wind_offshore = {}
-        results_load = {}
-        results_solar = {}
-        # Mapping of variables to their corresponding naive models
-        naive_models = {
-            'Wind_onshore': 'Wind_onshore_DailyNaiveSeasonal',
-            'Wind_offshore': 'Wind_offshore_DailyNaiveSeasonal',
-            'Load': 'Load_WeeklyNaiveSeasonal',
-            'Solar': 'Solar_DailyNaiveSeasonal'
-        }
-        # Step 1: Calculate MAE, RMSE, and rMAE for each model
-        for col in model_columns:
-            # Extract the variable name by taking everything before the first underscore
-            base_variable = col.split('_')[0]
-            # Handle cases where variable names might be combined with multiple parts (e.g., "Load_LightGBMModel...")
-            if base_variable in ['Wind', 'Load', 'Solar']:
-                if 'onshore' in col:
-                    variable_name = 'Wind_onshore'
-                    results_dict = results_wind_onshore
-                elif 'offshore' in col:
-                    variable_name = 'Wind_offshore'
-                    results_dict = results_wind_offshore
-                else:
-                    variable_name = base_variable
-                    results_dict = results_load if base_variable == 'Load' else results_solar
-            else:
-                variable_name = base_variable
-            # Construct the corresponding `variable_entsoe` column name
-            entsoe_column = f'{variable_name}_entsoe'
-            naive_model_col = naive_models.get(variable_name, None)
-            # Drop NaNs for the specific pair of columns before calculating MAE and RMSE
-            if entsoe_column in df_combined.columns and naive_model_col in df_combined.columns:
-                valid_data = df_combined[[col, entsoe_column]].dropna()
-                valid_naive_data = df_combined[[entsoe_column, naive_model_col]].dropna()
-                # Calculate MAE and RMSE for the model against the `variable_entsoe`
-                mae = np.mean(abs(valid_data[col] - valid_data[entsoe_column]))
-                rmse = np.sqrt(mean_squared_error(valid_data[col], valid_data[entsoe_column]))
-                # Calculate MAE for the Naive model
-                mae_naive = np.mean(abs(valid_naive_data[entsoe_column] - valid_naive_data[naive_model_col]))
-                # Calculate rMAE for the model
-                rMAE = mae / mae_naive if mae_naive != 0 else np.inf
-                # Store the results in the corresponding dictionary
-                results_dict[f'{col}'] = {'MAE': mae, 'RMSE': rmse, 'rMAE': rMAE}
-        # Step 2: Calculate MAE, RMSE, and rMAE for ENTSO-E forecasts specifically
-        for variable_name in naive_models.keys():
-            entsoe_column = f'{variable_name}_entsoe'
-            forecast_entsoe_column = f'{variable_name}_forecast_entsoe'
-            naive_model_col = naive_models[variable_name]
-            # Ensure that the ENTSO-E forecast is included in the results
-            if forecast_entsoe_column in df_combined.columns:
-                valid_data = df_combined[[forecast_entsoe_column, entsoe_column]].dropna()
-                valid_naive_data = df_combined[[entsoe_column, naive_model_col]].dropna()
-                # Calculate MAE and RMSE for the ENTSO-E forecast against the actuals
-                mae_entsoe = np.mean(abs(valid_data[forecast_entsoe_column] - valid_data[entsoe_column]))
-                rmse_entsoe = np.sqrt(mean_squared_error(valid_data[forecast_entsoe_column], valid_data[entsoe_column]))
-                # Calculate rMAE for the ENTSO-E forecast
-                mae_naive = np.mean(abs(valid_naive_data[entsoe_column] - valid_naive_data[naive_model_col]))
-                rMAE_entsoe = mae_entsoe / mae_naive if mae_naive != 0 else np.inf
-                # Add the ENTSO-E results to the corresponding dictionary
-                if variable_name == 'Wind_onshore':
-                    results_wind_onshore[forecast_entsoe_column] = {'MAE': mae_entsoe, 'RMSE': rmse_entsoe, 'rMAE': rMAE_entsoe}
-                elif variable_name == 'Wind_offshore':
-                    results_wind_offshore[forecast_entsoe_column] = {'MAE': mae_entsoe, 'RMSE': rmse_entsoe, 'rMAE': rMAE_entsoe}
-                elif variable_name == 'Load':
-                    results_load[forecast_entsoe_column] = {'MAE': mae_entsoe, 'RMSE': rmse_entsoe, 'rMAE': rMAE_entsoe}
-                elif variable_name == 'Solar':
-                    results_solar[forecast_entsoe_column] = {'MAE': mae_entsoe, 'RMSE': rmse_entsoe, 'rMAE': rMAE_entsoe}
-        # Convert the dictionaries to DataFrames and sort by rMAE
-        df_wind_onshore = pd.DataFrame.from_dict(results_wind_onshore, orient='index').sort_values(by='rMAE')
-        df_wind_offshore = pd.DataFrame.from_dict(results_wind_offshore, orient='index').sort_values(by='rMAE')
-        df_load = pd.DataFrame.from_dict(results_load, orient='index').sort_values(by='rMAE')
-        df_solar = pd.DataFrame.from_dict(results_solar, orient='index').sort_values(by='rMAE')
-        st.write("##### Wind Onshore:")
-        st.dataframe(df_wind_onshore)
-        st.write("##### Wind Offshore:")
-        st.dataframe(df_wind_offshore)
-        st.write("##### Load:")
-        st.dataframe(df_load)
-        st.write("##### Solar:")
-        st.dataframe(df_solar)
-    else:
-        accuracy_metrics = pd.DataFrame(columns=['MAE', 'rMAE'], index=['Load', 'Solar', 'Wind Onshore', 'Wind Offshore'])
-        for i in range(0, len(forecast_columns), 2):
-            actual_col = forecast_columns[i]
-            forecast_col = forecast_columns[i + 1]
-            if forecast_col in data.columns:
-                obs = data[actual_col]
-                pred = data[forecast_col]
-                error = pred - obs
-                mae = round(np.mean(np.abs(error)),2)
-                if 'Load' in actual_col:
-                    persistence = obs.shift(168)  # Weekly persistence
-                else:
-                    persistence = obs.shift(24)  # Daily persistence
-                # Using the whole year's data for rMAE calculations
-                rmae = round(mae / np.mean(np.abs(obs - persistence)),2)
-                row_label = 'Load' if 'Load' in actual_col else 'Solar' if 'Solar' in actual_col else 'Wind Offshore' if 'Wind_offshore' in actual_col else 'Wind Onshore'
-                accuracy_metrics.loc[row_label] = [mae, rmae]
-        accuracy_metrics.dropna(how='all', inplace=True)# Sort by rMAE (second column)
-        accuracy_metrics.sort_values(by=accuracy_metrics.columns[1], ascending=True, inplace=True)
-        accuracy_metrics = accuracy_metrics.round(4)
-        col1, col2 = st.columns([3, 2])
-        with col1:
-            st.dataframe(accuracy_metrics)
-        with col2:
-            st.markdown("""
-                <style>
-                .big-font {
-                    font-size: 20px;
-                    font-weight: 500;
-                }
-                </style>
-                <div class="big-font">
-                Equations
-                </div>
-                """, unsafe_allow_html=True)
-            st.markdown(r"""
-            $\text{MAE} = \frac{1}{n}\sum_{i=1}^{n}|y_i - \hat{y}_i|$
-            $\text{rMAE} = \frac{\text{MAE}}{MAE_{\text{Persistence Model}}}$
-            """)
-    st.subheader('ACF plots of Errors')
-    st.write('The below plots show the ACF (Auto-Correlation Function) for the errors of all three fields: Solar, Wind and Load.')
-    for i in range(0, len(forecast_columns), 2):
-        actual_col = forecast_columns[i]
-        forecast_col = forecast_columns[i + 1]
-        if forecast_col in data.columns:
-            obs = data[actual_col]
-            pred = data[forecast_col]
-            error = pred - obs
-            st.write(f"**ACF of Errors for {actual_col}**")
-            fig, ax = plt.subplots(figsize=(10, 5))
-            plot_acf(error.dropna(), ax=ax)
-            st.pyplot(fig)
-            acf_values = acf(error.dropna(), nlags=240)
-# Section 3: Insights
-elif section == 'Insights':
-    st.header("Insights")
-    st.write("""
-    This section provides insights derived from the data and forecasts.
-    You can visualize trends, anomalies, and other important findings.
-    """)
-    # Scatter plots for correlation between wind, solar, and load
-    st.subheader('Correlation between Wind, Solar, and Load')
-    st.write('The below scatter plots for correlation between all three fields: Solar, Wind and Load.')
-    combinations = [('Solar_entsoe', 'Load_entsoe'), ('Wind_onshore_entsoe', 'Load_entsoe'), ('Wind_offshore_entsoe', 'Load_entsoe'), ('Solar_entsoe', 'Wind_onshore_entsoe'), ('Solar_entsoe', 'Wind_offshore_entsoe')]
-    for x_col, y_col in combinations:
-        if x_col in data.columns and y_col in data.columns:
-            # For solar combinations, filter out zero values
-            if 'Solar_entsoe' in x_col:
-                filtered_data = data[data['Solar_entsoe'] > 0]
-                x_values = filtered_data[x_col]
-                y_values = filtered_data[y_col]
-            else:
-                x_values = data[x_col]
-                y_values = data[y_col]
-            corr_coef = x_values.corr(y_values)
-            fig = px.scatter(
-                x=x_values,
-                y=y_values,
-                labels={'x': f'{x_col} [MW]', 'y': f'{y_col} [MW]'},
-                title=f'{x_col} vs {y_col} (Correlation: {corr_coef:.2f})', color_discrete_sequence=['grey'])
-            st.plotly_chart(fig)
-    st.subheader('Weather vs. Generation/Demand')
-    st.write('The below scatter plots show the relation between weather parameters (i.e., Temperature, Wind Speed) and generation/demand.')
-    for weather_col in weather_columns:
-        for actual_col in ['Load_entsoe', 'Solar_entsoe', 'Wind_onshore_entsoe', 'Wind_offshore_entsoe']:
-            if weather_col in data.columns and actual_col in data.columns:
-                clean_label = actual_col.replace('_entsoe', '')
-                if weather_col == 'Temperature':
-                    fig = px.scatter(x=data[weather_col], y=data[actual_col], labels={'x': f'{weather_col} (°C)', 'y': f'{clean_label} Generation [MW]'}, color_discrete_sequence=['orange'])
-                else:
-                    fig = px.scatter(x=data[weather_col], y=data[actual_col], labels={'x': f'{weather_col} (km/h)', 'y': clean_label})
-                fig.update_layout(title=f'{weather_col} vs {actual_col}')
-                st.plotly_chart(fig)

+import requests
+import pandas as pd
+from io import StringIO
+import streamlit as st
+import os
+import plotly.express as px
+import plotly.graph_objects as go
+import plotly.colors as pc
+import numpy as np
+from sklearn.metrics import mean_squared_error
+from statsmodels.tsa.stattools import acf
+from statsmodels.graphics.tsaplots import plot_acf
+import matplotlib.pyplot as plt
+##GET ALL FILES FROM GITHUB
+def load_GitHub(github_token, file_name):
+    url = f'https://raw.githubusercontent.com/margaridamascarenhas/Transparency_Data/main/{file_name}'
+    headers = {'Authorization': f'token {github_token}'}
+    response = requests.get(url, headers=headers)
+    if response.status_code == 200:
+        csv_content = StringIO(response.text)
+        df = pd.read_csv(csv_content)
+        if 'Date' in df.columns:
+            df['Date'] = pd.to_datetime(df['Date'])  # Convert 'Date' column to datetime
+            df.set_index('Date', inplace=True)  # Set 'Date' column as the index
+            #df.to_csv(file_name)
+        return df
+    else:
+        print(f"Failed to download {file_name}. Status code: {response.status_code}")
+        return None
+def load_forecast(github_token):
+    predictions_dict = {}
+    for hour in range(24):
+        file_name = f'Predictions_{hour}h.csv'
+        df = load_GitHub(github_token, file_name)
+        if df is not None:
+            predictions_dict[file_name] = df
+    return predictions_dict
+def convert_European_time(data, time_zone):
+    data.index = pd.to_datetime(data.index, utc=True)
+    data.index = data.index.tz_convert(time_zone)
+    data.index = data.index.tz_localize(None)
+    return data
+github_token = st.secrets["GitHub_Token_KUL_Margarida"]
+if github_token:
+    forecast_dict = load_forecast(github_token)
+    historical_forecast=load_GitHub(github_token, 'Historical_forecast.csv')
+    Data_BE=load_GitHub(github_token, 'BE_Elia_Entsoe_UTC.csv')
+    Data_FR=load_GitHub(github_token, 'FR_Entsoe_UTC.csv')
+    Data_NL=load_GitHub(github_token, 'NL_Entsoe_UTC.csv')
+    Data_DE=load_GitHub(github_token, 'DE_Entsoe_UTC.csv')
+    Data_BE=convert_European_time(Data_BE, 'Europe/Brussels')
+    Data_FR=convert_European_time(Data_FR, 'Europe/Paris')
+    Data_NL=convert_European_time(Data_NL, 'Europe/Amsterdam')
+    Data_DE=convert_European_time(Data_DE, 'Europe/Berlin')
+else:
+    print("Please enter your GitHub Personal Access Token to proceed.")
+def conformal_predictions(data, target, my_forecast):
+    data['Residuals'] = data[my_forecast] - data[actual_col]
+    data['Hour'] = data.index.hour
+    min_date = data.index.min()
+    for date in data.index.normalize().unique():
+        if date >= min_date + pd.DateOffset(days=30):
+            start_date = date - pd.DateOffset(days=30)
+            end_date = date
+            calculation_window = data[start_date:end_date-pd.DateOffset(hours=1)]
+            quantiles = calculation_window.groupby('Hour')['Residuals'].quantile(0.8)
+            # Use .loc to safely access and modify data
+            if date in data.index:
+                current_day_data = data.loc[date.strftime('%Y-%m-%d')]
+                for hour in current_day_data['Hour'].unique():
+                    if hour in quantiles.index:
+                        hour_quantile = quantiles[hour]
+                        idx = (data.index.normalize() == date) & (data.Hour == hour)
+                        data.loc[idx, 'Quantile_80'] = hour_quantile
+                        data.loc[idx, 'Lower_Interval'] = data.loc[idx, my_forecast] - hour_quantile
+                        data.loc[idx, 'Upper_Interval'] = data.loc[idx, my_forecast] + hour_quantile
+    #data.reset_index(inplace=True)
+    return data
+st.title("Transparency++")
+countries = {
+    'Belgium': 'BE',
+    'Netherlands': 'NL',
+    'Germany': 'DE',
+    'France': 'FR',
+}
+st.sidebar.header('Filters')
+selected_country = st.sidebar.selectbox('Select Country', list(countries.keys()))
+st.write()
+date_range = st.sidebar.date_input("Select Date Range for Metrics Calculation:",
+                                   value=(pd.to_datetime("2024-01-01"), pd.to_datetime(pd.Timestamp('today'))))
+# Ensure the date range provides two dates
+if len(date_range) == 2:
+    start_date = pd.Timestamp(date_range[0])
+    end_date = pd.Timestamp(date_range[1])
+else:
+    st.error("Please select a valid date range.")
+    st.stop()
+# Sidebar with radio buttons for different sections
+section = st.sidebar.radio('Section', ['Data', 'Forecasts', 'Insights'])
+country_code = countries[selected_country]
+if country_code == 'BE':
+    data = Data_BE
+    weather_columns = ['Temperature', 'Wind Speed Onshore', 'Wind Speed Offshore']
+    data['Temperature'] = data['temperature_2m_8']
+    data['Wind Speed Offshore'] = data['wind_speed_100m_4']
+    data['Wind Speed Onshore'] = data['wind_speed_100m_8']
+elif country_code == 'DE':
+    data = Data_DE
+    weather_columns = ['Temperature', 'Wind Speed']
+    data['Temperature'] = data['temperature_2m']
+    data['Wind Speed'] = data['wind_speed_100m']
+elif country_code == 'NL':
+    data = Data_NL
+    weather_columns = ['Temperature', 'Wind Speed']
+    data['Temperature'] = data['temperature_2m']
+    data['Wind Speed'] = data['wind_speed_100m']
+elif country_code == 'FR':
+    data = Data_FR
+    weather_columns = ['Temperature', 'Wind Speed']
+    data['Temperature'] = data['temperature_2m']
+    data['Wind Speed'] = data['wind_speed_100m']
+def add_feature(df2, df_main):
+    #df_main.index = pd.to_datetime(df_main.index)
+    #df2.index = pd.to_datetime(df2.index)
+    df_combined = df_main.combine_first(df2)
+    last_date_df1 = df_main.index.max()
+    first_date_df2 = df2.index.min()
+    if first_date_df2 == last_date_df1 + pd.Timedelta(hours=1):
+        df_combined = pd.concat([df_main, df2[df2.index > last_date_df1]], axis=0)
+    #df_combined.reset_index(inplace=True)
+    return df_combined
+#data.index = data.index.tz_localize('UTC')
+data = data.loc[start_date:end_date]
+forecast_columns = [
+    'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
+if section == 'Data':
+    st.header("Data")
+    st.write("""
+    This section allows you to explore and upload your datasets.
+    You can visualize raw data, clean it, and prepare it for analysis.
+    """)
+    st.header('Data Quality')
+    output_text = f"The below percentages are calculated from the selected date range from {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}. This interval can be adjusted from the sidebar."
+    st.write(output_text)
+    # Report % of missing values
+    missing_values = data[forecast_columns].isna().mean() * 100
+    missing_values = missing_values.round(2)
+    installed_capacities = {
+        'FR': { 'Solar': 17419, 'Wind Offshore': 1483, 'Wind Onshore': 22134},
+        'DE': { 'Solar': 73821, 'Wind Offshore': 8386, 'Wind Onshore': 59915},
+        'BE': { 'Solar': 8789, 'Wind Offshore': 2262, 'Wind Onshore': 3053},
+        'NL': { 'Solar': 22590, 'Wind Offshore': 3220, 'Wind Onshore': 6190},
+    }
+    if country_code not in installed_capacities:
+        st.error(f"Installed capacities not defined for country code '{country_code}'.")
+        st.stop()
+    # Report % of extreme, impossible values for the selected country
+    capacities = installed_capacities[country_code]
+    extreme_values = {}
+    for col in forecast_columns:
+            if 'Solar_entsoe' in col:
+                extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Solar'])).mean() * 100
+            elif 'Solar_forecast_entsoe' in col:
+                extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Solar'])).mean() * 100
+            elif 'Wind_onshore_entsoe' in col:
+                extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Onshore'])).mean() * 100
+            elif 'Wind_onshore_forecast_entsoe' in col:
+                extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Onshore'])).mean() * 100
+            elif 'Wind_offshore_entsoe' in col:
+                extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Offshore'])).mean() * 100
+            elif 'Wind_offshore_forecast_entsoe' in col:
+                extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Offshore'])).mean() * 100
+            elif 'Load_entsoe' in col:
+                extreme_values[col] = ((data[col] < 0)).mean() * 100
+            elif 'Load_forecast_entsoe' in col:
+                extreme_values[col] = ((data[col] < 0)).mean() * 100
+    extreme_values = pd.Series(extreme_values).round(2)
+    # Combine all metrics into one DataFrame
+    metrics_df = pd.DataFrame({
+    'Missing Values (%)': missing_values,
+    'Extreme/Nonsensical Values (%)': extreme_values,
+    })
+    st.markdown(
+    """
+    <style>
+    .dataframe {font-size: 45px !important;}
+    </style>
+    """,
+    unsafe_allow_html=True
+    )
+    st.dataframe(metrics_df)
+    st.write('<b><u>Missing values (%)</u></b>: Percentage of missing values in the dataset', unsafe_allow_html=True)
+    st.write('<b><u>Extreme/Nonsensical values (%)</u></b>: Values that are considered implausible such as negative or out-of-bound values i.e., (generation<0) or (generation>capacity)', unsafe_allow_html=True)
+# Section 2: Forecasts
+elif section == 'Forecasts':
+    st.header('Forecast Quality')
+    # Time series for last 1 week
+    st.subheader('Time Series: Last 1 Week')
+    last_week = Data_BE.loc[Data_BE.index >= (data.index[-1] - pd.Timedelta(days=7))]
+    st.write('The below plots show the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform between the selected data range.')
+    forecast_columns_operational = [
+    'Load_entsoe','Load_forecast_entsoe', 'Load_LightGBMModel.7D.TimeCov.Temp.Forecast_elia', 'Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_onshore_LightGBMModel.1D.TimeCov.Temp.Forecast_elia','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Wind_offshore_LightGBMModel.1D.TimeCov.Temp.Forecast_elia','Solar_entsoe','Solar_forecast_entsoe', 'Solar_LightGBMModel.1D.TimeCov.Temp.Forecast_elia']
+    forecast_columns = [
+    'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
+    operation_forecast_load=forecast_dict['Predictions_10h.csv'].filter(like='Load_', axis=1)
+    operation_forecast_res=forecast_dict['Predictions_17h.csv'].filter(regex='^(?!Load_)')
+    operation_forecast_load.columns = [col.replace('_entsoe.', '_').replace('Naive.7D', 'WeeklyNaiveSeasonal') for col in operation_forecast_load.columns]
+    operation_forecast_res.columns = [col.replace('_entsoe.', '_').replace('Naive.1D', 'DailyNaiveSeasonal') for col in operation_forecast_res.columns]
+    Historical_and_Load=add_feature(operation_forecast_load, historical_forecast)
+    Historical_and_operational=add_feature(operation_forecast_res, Historical_and_Load)
+    #print(Historical_and_operational.filter(like='Forecast_elia', axis=1))
+    best_forecast = Historical_and_operational.filter(like='Forecast_elia', axis=1)
+    df_combined = Historical_and_operational.join(Data_BE, how='inner')
+    last_week_best_forecast = best_forecast.loc[best_forecast.index >= (best_forecast.index[-24] - pd.Timedelta(days=8))]
+    for i in range(0, len(forecast_columns_operational), 3):
+        actual_col = forecast_columns_operational[i]
+        forecast_col = forecast_columns_operational[i + 1]
+        my_forecast = forecast_columns_operational[i + 2]
+        if forecast_col in data.columns:
+            fig = go.Figure()
+            fig.add_trace(go.Scatter(x=last_week.index, y=last_week[actual_col], mode='lines', name='Actual'))
+            fig.add_trace(go.Scatter(x=last_week.index, y=last_week[forecast_col], mode='lines', name='Forecast ENTSO-E'))
+            if country_code=='BE':
+                conformal=conformal_predictions(df_combined, actual_col, my_forecast)
+                last_week_conformal = conformal.loc[conformal.index >= (conformal.index[-24] - pd.Timedelta(days=8))]
+                if actual_col =='Load_entsoe':
+                    last_week_conformal = conformal.loc[conformal.index >= (conformal.index[-24] - pd.Timedelta(days=5))]
+                fig.add_trace(go.Scatter(x=last_week_best_forecast.index, y=last_week_best_forecast[my_forecast], mode='lines', name='Forecast EDS'))
+                fig.add_trace(go.Scatter(
+                    x=last_week_conformal.index,
+                    y=last_week_conformal['Lower_Interval'],
+                    mode='lines',
+                    line=dict(width=0),
+                    showlegend=False
+                ))
+                # Add the upper interval trace and fill to the lower interval
+                fig.add_trace(go.Scatter(
+                    x=last_week_conformal.index,
+                    y=last_week_conformal['Upper_Interval'],
+                    mode='lines',
+                    line=dict(width=0),
+                    fill='tonexty',  # Fill between this trace and the previous one
+                    fillcolor='rgba(68, 68, 68, 0.3)',
+                    name='P10/P90 prediction intervals'
+                ))
+            fig.update_layout(title=f'Forecasts vs Actual for {actual_col}', xaxis_title='Date', yaxis_title='Value [MW]')
+            st.plotly_chart(fig)
+    def plot_category(df_dict, category_prefix, title):
+        fig = go.Figure()
+        # Define base colors for each model
+        model_colors = {
+            'LightGBMModel.TimeCov.Temp.Forecast_elia': '#1f77b4',  # Blue
+            'LightGBMModel.TimeCov.Temp': '#2ca02c',  # Green
+            'Naive': '#ff7f0e'  # Orange
+        }
+        # To keep track of which model has been added to the legend
+        legend_added = {'LightGBMModel.TimeCov.Temp.Forecast_elia': False, 'LightGBMModel.TimeCov.Temp': False, 'Naive': False}
+        for file_name, df in df_dict.items():
+            # Extract the hour from the filename, assuming the format is "Predictions_Xh.csv"
+            hour = int(file_name.split('_')[1].replace('h.csv', ''))
+            filtered_columns = [col for col in df.columns if col.startswith(category_prefix)]
+            for column in filtered_columns:
+                # Identify the model type with more precise logic
+                if 'LightGBMModel' in column:
+                    if 'Forecast_elia' in column:
+                        model_key = 'LightGBMModel.TimeCov.Temp.Forecast_elia'
+                    elif 'TimeCov' in column:
+                        model_key = 'LightGBMModel.TimeCov.Temp'
+                elif 'Naive' in column:
+                    model_key = 'Naive'
+                else:
+                    continue  # Skip if it doesn't match any model type
+                # Extract the relevant part of the model name
+                parts = column.split('.')
+                model_name_parts = parts[1:]  # Skip the variable prefix
+                model_name = '.'.join(model_name_parts)  # Rejoin the parts to form the model name
+                # Get the base color for the model
+                base_color = model_colors[model_key]
+                # Calculate the color shade based on the hour
+                color_scale = pc.hex_to_rgb(base_color)
+                scale_factor = 0.3 + (hour / 40)  # Adjust scale to ensure the gradient is visible
+                adjusted_color = tuple(int(c * scale_factor) for c in color_scale)
+                # Convert to RGBA with transparency for plot lines
+                line_color = f'rgba({adjusted_color[0]}, {adjusted_color[1]}, {adjusted_color[2]}, 0.1)'  # Transparent color for lines
+                # Combine the hour and the model name for the legend, but only add the legend entry once
+                show_legend = not legend_added[model_key]
+                fig.add_trace(go.Scatter(
+                    x=df.index,  # Assuming 'Date' is the index, use 'df.index' for x-axis
+                    y=df[column],
+                    mode='lines',
+                    name=model_name if show_legend else None,  # Use the model name for the legend, but only once
+                    line=dict(color=base_color if show_legend else line_color),  # Use opaque color for legend, transparent for lines
+                    showlegend=show_legend,  # Show legend only once per model
+                    legendgroup=model_key  # Grouping for consistent legend color
+                ))
+                # Mark that this model has been added to the legend
+                if show_legend:
+                    legend_added[model_key] = True
+            # Add real values as a separate trace, if provided
+            filtered_Data_BE_df = Data_BE.loc[df.index]
+        if filtered_Data_BE_df[f'{category_prefix}_entsoe'].notna().any():
+            fig.add_trace(go.Scatter(
+                x=filtered_Data_BE_df.index,
+                y=filtered_Data_BE_df[f'{category_prefix}_entsoe'],
+                mode='lines',
+                name=f'Actual {category_prefix}',
+                line=dict(color='black', width=2),  # Black line for real values
+                showlegend=True  # Always show this in the legend
+            ))
+        # Update layout to position the legend at the top, side by side
+        fig.update_layout(
+            title=dict(
+                text=title,
+                x=0,  # Center the title horizontally
+                y=1.00,  # Slightly lower the title to create more space
+                xanchor='left',
+                yanchor='top'
+            ),
+            xaxis_title='Date',
+            yaxis_title='Value',
+            legend=dict(
+                orientation="h",  # Horizontal legend
+                yanchor="bottom",  # Align to the bottom of the legend box
+                y=1,  # Increase y position to avoid overlap with the title
+                xanchor="center",  # Center the legend horizontally
+                x=0.5  # Position at the center of the plot
+            )
+        )
+        return fig
+    if country_code == "BE":
+        st.header('EDS Forecasts by Hour')
+        solar_fig = plot_category(forecast_dict, 'Solar', 'Solar Predictions')
+        st.plotly_chart(solar_fig)
+        wind_offshore_fig = plot_category(forecast_dict, 'Wind_offshore', 'Wind Offshore Predictions')
+        st.plotly_chart(wind_offshore_fig)
+        wind_onshore_fig = plot_category(forecast_dict, 'Wind_onshore', 'Wind Onshore Predictions')
+        st.plotly_chart(wind_onshore_fig)
+        load_fig = plot_category(forecast_dict, 'Load', 'Load Predictions')
+        st.plotly_chart(load_fig)
+    # Scatter plots for error distribution
+    st.subheader('Error Distribution')
+    st.write('The below scatter plots show the error distribution of all three fields: Solar, Wind and Load between the selected date range')
+    for i in range(0, len(forecast_columns), 2):
+        actual_col = forecast_columns[i]
+        forecast_col = forecast_columns[i + 1]
+        if forecast_col in data.columns:
+            obs = last_week[actual_col]
+            pred = last_week[forecast_col]
+            error = pred - obs
+            fig = px.scatter(x=obs, y=pred, labels={'x': 'Observed [MW]', 'y': 'Predicted by ENTSO-E [MW]'})
+            fig.update_layout(title=f'Error Distribution for {forecast_col}')
+            st.plotly_chart(fig)
+    st.subheader('Accuracy Metrics (Sorted by rMAE):')
+    if country_code == "BE":
+        # Combine the two DataFrames on their index
+        df_combined = Historical_and_operational.join(Data_BE, how='inner')
+        # List of model columns from historical_forecast
+        model_columns = historical_forecast.columns
+        # Initialize dictionaries to store MAE and RMSE results for each variable
+        results_wind_onshore = {}
+        results_wind_offshore = {}
+        results_load = {}
+        results_solar = {}
+        # Mapping of variables to their corresponding naive models
+        naive_models = {
+            'Wind_onshore': 'Wind_onshore_DailyNaiveSeasonal',
+            'Wind_offshore': 'Wind_offshore_DailyNaiveSeasonal',
+            'Load': 'Load_WeeklyNaiveSeasonal',
+            'Solar': 'Solar_DailyNaiveSeasonal'
+        }
+        # Step 1: Calculate MAE, RMSE, and rMAE for each model
+        for col in model_columns:
+            # Extract the variable name by taking everything before the first underscore
+            base_variable = col.split('_')[0]
+            # Handle cases where variable names might be combined with multiple parts (e.g., "Load_LightGBMModel...")
+            if base_variable in ['Wind', 'Load', 'Solar']:
+                if 'onshore' in col:
+                    variable_name = 'Wind_onshore'
+                    results_dict = results_wind_onshore
+                elif 'offshore' in col:
+                    variable_name = 'Wind_offshore'
+                    results_dict = results_wind_offshore
+                else:
+                    variable_name = base_variable
+                    results_dict = results_load if base_variable == 'Load' else results_solar
+            else:
+                variable_name = base_variable
+            # Construct the corresponding `variable_entsoe` column name
+            entsoe_column = f'{variable_name}_entsoe'
+            naive_model_col = naive_models.get(variable_name, None)
+            # Drop NaNs for the specific pair of columns before calculating MAE and RMSE
+            if entsoe_column in df_combined.columns and naive_model_col in df_combined.columns:
+                valid_data = df_combined[[col, entsoe_column]].dropna()
+                valid_naive_data = df_combined[[entsoe_column, naive_model_col]].dropna()
+                # Calculate MAE and RMSE for the model against the `variable_entsoe`
+                mae = np.mean(abs(valid_data[col] - valid_data[entsoe_column]))
+                rmse = np.sqrt(mean_squared_error(valid_data[col], valid_data[entsoe_column]))
+                # Calculate MAE for the Naive model
+                mae_naive = np.mean(abs(valid_naive_data[entsoe_column] - valid_naive_data[naive_model_col]))
+                # Calculate rMAE for the model
+                rMAE = mae / mae_naive if mae_naive != 0 else np.inf
+                # Store the results in the corresponding dictionary
+                results_dict[f'{col}'] = {'MAE': mae, 'RMSE': rmse, 'rMAE': rMAE}
+        # Step 2: Calculate MAE, RMSE, and rMAE for ENTSO-E forecasts specifically
+        for variable_name in naive_models.keys():
+            entsoe_column = f'{variable_name}_entsoe'
+            forecast_entsoe_column = f'{variable_name}_forecast_entsoe'
+            naive_model_col = naive_models[variable_name]
+            # Ensure that the ENTSO-E forecast is included in the results
+            if forecast_entsoe_column in df_combined.columns:
+                valid_data = df_combined[[forecast_entsoe_column, entsoe_column]].dropna()
+                valid_naive_data = df_combined[[entsoe_column, naive_model_col]].dropna()
+                # Calculate MAE and RMSE for the ENTSO-E forecast against the actuals
+                mae_entsoe = np.mean(abs(valid_data[forecast_entsoe_column] - valid_data[entsoe_column]))
+                rmse_entsoe = np.sqrt(mean_squared_error(valid_data[forecast_entsoe_column], valid_data[entsoe_column]))
+                # Calculate rMAE for the ENTSO-E forecast
+                mae_naive = np.mean(abs(valid_naive_data[entsoe_column] - valid_naive_data[naive_model_col]))
+                rMAE_entsoe = mae_entsoe / mae_naive if mae_naive != 0 else np.inf
+                # Add the ENTSO-E results to the corresponding dictionary
+                if variable_name == 'Wind_onshore':
+                    results_wind_onshore[forecast_entsoe_column] = {'MAE': mae_entsoe, 'RMSE': rmse_entsoe, 'rMAE': rMAE_entsoe}
+                elif variable_name == 'Wind_offshore':
+                    results_wind_offshore[forecast_entsoe_column] = {'MAE': mae_entsoe, 'RMSE': rmse_entsoe, 'rMAE': rMAE_entsoe}
+                elif variable_name == 'Load':
+                    results_load[forecast_entsoe_column] = {'MAE': mae_entsoe, 'RMSE': rmse_entsoe, 'rMAE': rMAE_entsoe}
+                elif variable_name == 'Solar':
+                    results_solar[forecast_entsoe_column] = {'MAE': mae_entsoe, 'RMSE': rmse_entsoe, 'rMAE': rMAE_entsoe}
+        # Convert the dictionaries to DataFrames and sort by rMAE
+        df_wind_onshore = pd.DataFrame.from_dict(results_wind_onshore, orient='index').sort_values(by='rMAE')
+        df_wind_offshore = pd.DataFrame.from_dict(results_wind_offshore, orient='index').sort_values(by='rMAE')
+        df_load = pd.DataFrame.from_dict(results_load, orient='index').sort_values(by='rMAE')
+        df_solar = pd.DataFrame.from_dict(results_solar, orient='index').sort_values(by='rMAE')
+        st.write("##### Wind Onshore:")
+        st.dataframe(df_wind_onshore)
+        st.write("##### Wind Offshore:")
+        st.dataframe(df_wind_offshore)
+        st.write("##### Load:")
+        st.dataframe(df_load)
+        st.write("##### Solar:")
+        st.dataframe(df_solar)
+    else:
+        accuracy_metrics = pd.DataFrame(columns=['MAE', 'rMAE'], index=['Load', 'Solar', 'Wind Onshore', 'Wind Offshore'])
+        for i in range(0, len(forecast_columns), 2):
+            actual_col = forecast_columns[i]
+            forecast_col = forecast_columns[i + 1]
+            if forecast_col in data.columns:
+                obs = data[actual_col]
+                pred = data[forecast_col]
+                error = pred - obs
+                mae = round(np.mean(np.abs(error)),2)
+                if 'Load' in actual_col:
+                    persistence = obs.shift(168)  # Weekly persistence
+                else:
+                    persistence = obs.shift(24)  # Daily persistence
+                # Using the whole year's data for rMAE calculations
+                rmae = round(mae / np.mean(np.abs(obs - persistence)),2)
+                row_label = 'Load' if 'Load' in actual_col else 'Solar' if 'Solar' in actual_col else 'Wind Offshore' if 'Wind_offshore' in actual_col else 'Wind Onshore'
+                accuracy_metrics.loc[row_label] = [mae, rmae]
+        accuracy_metrics.dropna(how='all', inplace=True)# Sort by rMAE (second column)
+        accuracy_metrics.sort_values(by=accuracy_metrics.columns[1], ascending=True, inplace=True)
+        accuracy_metrics = accuracy_metrics.round(4)
+        col1, col2 = st.columns([3, 2])
+        with col1:
+            st.dataframe(accuracy_metrics)
+        with col2:
+            st.markdown("""
+                <style>
+                .big-font {
+                    font-size: 20px;
+                    font-weight: 500;
+                }
+                </style>
+                <div class="big-font">
+                Equations
+                </div>
+                """, unsafe_allow_html=True)
+            st.markdown(r"""
+            $\text{MAE} = \frac{1}{n}\sum_{i=1}^{n}|y_i - \hat{y}_i|$
+            $\text{rMAE} = \frac{\text{MAE}}{MAE_{\text{Persistence Model}}}$
+            """)
+    st.subheader('ACF plots of Errors')
+    st.write('The below plots show the ACF (Auto-Correlation Function) for the errors of all three fields: Solar, Wind and Load.')
+    for i in range(0, len(forecast_columns), 2):
+        actual_col = forecast_columns[i]
+        forecast_col = forecast_columns[i + 1]
+        if forecast_col in data.columns:
+            obs = data[actual_col]
+            pred = data[forecast_col]
+            error = pred - obs
+            st.write(f"**ACF of Errors for {actual_col}**")
+            fig, ax = plt.subplots(figsize=(10, 5))
+            plot_acf(error.dropna(), ax=ax)
+            st.pyplot(fig)
+            acf_values = acf(error.dropna(), nlags=240)
+# Section 3: Insights
+elif section == 'Insights':
+    st.header("Insights")
+    st.write("""
+    This section provides insights derived from the data and forecasts.
+    You can visualize trends, anomalies, and other important findings.
+    """)
+    # Scatter plots for correlation between wind, solar, and load
+    st.subheader('Correlation between Wind, Solar, and Load')
+    st.write('The below scatter plots for correlation between all three fields: Solar, Wind and Load.')
+    combinations = [('Solar_entsoe', 'Load_entsoe'), ('Wind_onshore_entsoe', 'Load_entsoe'), ('Wind_offshore_entsoe', 'Load_entsoe'), ('Solar_entsoe', 'Wind_onshore_entsoe'), ('Solar_entsoe', 'Wind_offshore_entsoe')]
+    for x_col, y_col in combinations:
+        if x_col in data.columns and y_col in data.columns:
+            # For solar combinations, filter out zero values
+            if 'Solar_entsoe' in x_col:
+                filtered_data = data[data['Solar_entsoe'] > 0]
+                x_values = filtered_data[x_col]
+                y_values = filtered_data[y_col]
+            else:
+                x_values = data[x_col]
+                y_values = data[y_col]
+            corr_coef = x_values.corr(y_values)
+            fig = px.scatter(
+                x=x_values,
+                y=y_values,
+                labels={'x': f'{x_col} [MW]', 'y': f'{y_col} [MW]'},
+                title=f'{x_col} vs {y_col} (Correlation: {corr_coef:.2f})', color_discrete_sequence=['grey'])
+            st.plotly_chart(fig)
+    st.subheader('Weather vs. Generation/Demand')
+    st.write('The below scatter plots show the relation between weather parameters (i.e., Temperature, Wind Speed) and generation/demand.')
+    for weather_col in weather_columns:
+        for actual_col in ['Load_entsoe', 'Solar_entsoe', 'Wind_onshore_entsoe', 'Wind_offshore_entsoe']:
+            if weather_col in data.columns and actual_col in data.columns:
+                clean_label = actual_col.replace('_entsoe', '')
+                if weather_col == 'Temperature':
+                    fig = px.scatter(x=data[weather_col], y=data[actual_col], labels={'x': f'{weather_col} (°C)', 'y': f'{clean_label} Generation [MW]'}, color_discrete_sequence=['orange'])
+                else:
+                    fig = px.scatter(x=data[weather_col], y=data[actual_col], labels={'x': f'{weather_col} (km/h)', 'y': clean_label})
+                fig.update_layout(title=f'{weather_col} vs {actual_col}')
+                st.plotly_chart(fig)