Spaces:

EDS-lab
/

Transparency_Plus

Sleeping

App Files Files Community

mmmapms commited on Sep 18

Commit

d709518

•

1 Parent(s): 993ece7

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -272

app.py CHANGED Viewed

@@ -84,19 +84,17 @@ def simplify_model_names_in_index(df):
     return df
-current_hour, after_10_min = get_current_time()
-github_token = st.secrets["GitHub_Token_KUL_Margarida"]
 if github_token:
-    forecast_dict = load_forecast(github_token, current_hour, after_10_min)
-    historical_forecast = load_GitHub(github_token, 'Historical_forecast.csv', current_hour, after_10_min)
-    Data_BE = load_GitHub(github_token, 'BE_Elia_Entsoe_UTC.csv', current_hour, after_10_min)
-    Data_FR = load_GitHub(github_token, 'FR_Entsoe_UTC.csv', current_hour, after_10_min)
-    Data_NL = load_GitHub(github_token, 'NL_Entsoe_UTC.csv', current_hour, after_10_min)
-    Data_DE = load_GitHub(github_token, 'DE_Entsoe_UTC.csv', current_hour, after_10_min)
     Data_BE=convert_European_time(Data_BE, 'Europe/Brussels')
     Data_FR=convert_European_time(Data_FR, 'Europe/Paris')
@@ -107,29 +105,6 @@ if github_token:
 else:
     print("Please enter your GitHub Personal Access Token to proceed.")
-def conformal_predictions(data, target, my_forecast):
-    data['Residuals'] = data[my_forecast] - data[actual_col]
-    data['Hour'] = data.index.hour
-    min_date = data.index.min()
-    for date in data.index.normalize().unique():
-        if date >= min_date + pd.DateOffset(days=30):
-            start_date = date - pd.DateOffset(days=30)
-            end_date = date
-            calculation_window = data[start_date:end_date-pd.DateOffset(hours=1)]
-            quantiles = calculation_window.groupby('Hour')['Residuals'].quantile(0.8)
-            # Use .loc to safely access and modify data
-            if date in data.index:
-                current_day_data = data.loc[date.strftime('%Y-%m-%d')]
-                for hour in current_day_data['Hour'].unique():
-                    if hour in quantiles.index:
-                        hour_quantile = quantiles[hour]
-                        idx = (data.index.normalize() == date) & (data.Hour == hour)
-                        data.loc[idx, 'Quantile_80'] = hour_quantile
-                        data.loc[idx, 'Lower_Interval'] = data.loc[idx, my_forecast] - hour_quantile
-                        data.loc[idx, 'Upper_Interval'] = data.loc[idx, my_forecast] + hour_quantile
-    #data.reset_index(inplace=True)
-    return data
 # Main layout of the app
 col1, col2 = st.columns([5, 2])  # Adjust the ratio to better fit your layout needs
@@ -151,6 +126,7 @@ upper_space.markdown("""
 """, unsafe_allow_html=True)
 countries = {
     'Netherlands': 'NL',
     'Germany': 'DE',
@@ -242,9 +218,7 @@ if section == 'Data':
     st.write('The table below presents the data quality metrics for various energy-related datasets, focusing on the percentage of missing values and the occurrence of extreme or nonsensical values for the selected country.')
     data_quality=data.iloc[:-28]
-    if country_code=='BE':
-        data_quality=data.iloc[:-5*24]
-    print(data_quality.tail(48))
     # Report % of missing values
     missing_values = data_quality[forecast_columns].isna().mean() * 100
     missing_values = missing_values.round(2)
@@ -320,61 +294,16 @@ elif section == 'Forecasts':
     'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
     num_per_var=2
-    if country_code=='BE':
-        operation_forecast_load=forecast_dict['Predictions_10h.csv'].filter(like='Load_', axis=1)
-        operation_forecast_res=forecast_dict['Predictions_17h.csv'].filter(regex='^(?!Load_)')
-        operation_forecast_load.columns = [col.replace('_entsoe.', '_').replace('Naive.7D', 'WeeklyNaiveSeasonal') for col in operation_forecast_load.columns]
-        operation_forecast_res.columns = [col.replace('_entsoe.', '_').replace('Naive.1D', 'DailyNaiveSeasonal') for col in operation_forecast_res.columns]
-        Historical_and_Load=add_feature(operation_forecast_load, historical_forecast)
-        Historical_and_operational=add_feature(operation_forecast_res, Historical_and_Load)
-        best_forecast = Historical_and_operational.filter(like='Forecast_elia', axis=1)
-        df_combined = Historical_and_operational.join(Data_BE, how='inner')
-        last_week_best_forecast = best_forecast.loc[best_forecast.index >= (best_forecast.index[-24] - pd.Timedelta(days=7))]
-        num_per_var=3
-        forecast_columns_line=['Load_entsoe','Load_forecast_entsoe', 'Load_LightGBMModel.7D.TimeCov.Temp.Forecast_elia', 'Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_onshore_LightGBMModel.1D.TimeCov.Temp.Forecast_elia','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Wind_offshore_LightGBMModel.1D.TimeCov.Temp.Forecast_elia','Solar_entsoe','Solar_forecast_entsoe', 'Solar_LightGBMModel.1D.TimeCov.Temp.Forecast_elia']
-    else:
-        forecast_columns_line=forecast_columns
     for i in range(0, len(forecast_columns_line), num_per_var):
         actual_col = forecast_columns_line[i]
         forecast_col = forecast_columns_line[i + 1]
-        if country_code=='BE':
-            my_forecast = forecast_columns_line[i + 2]
         if forecast_col in data.columns:
             fig = go.Figure()
             fig.add_trace(go.Scatter(x=last_week.index, y=last_week[actual_col], mode='lines', name='Actual'))
             fig.add_trace(go.Scatter(x=last_week.index, y=last_week[forecast_col], mode='lines', name='Forecast ENTSO-E'))
-            if country_code=='BE':
-                conformal=conformal_predictions(df_combined, actual_col, my_forecast)
-                last_week_conformal = conformal.loc[conformal.index >= (conformal.index[-24] - pd.Timedelta(days=7))]
-                if actual_col =='Load_entsoe':
-                    last_week_conformal = conformal.loc[conformal.index >= (conformal.index[-24] - pd.Timedelta(days=5))]
-                fig.add_trace(go.Scatter(x=last_week_best_forecast.index, y=last_week_best_forecast[my_forecast], mode='lines', name='Forecast EDS'))
-                fig.add_trace(go.Scatter(
-                    x=last_week_conformal.index,
-                    y=last_week_conformal['Lower_Interval'],
-                    mode='lines',
-                    line=dict(width=0),
-                    showlegend=False
-                ))
-                # Add the upper interval trace and fill to the lower interval
-                fig.add_trace(go.Scatter(
-                    x=last_week_conformal.index,
-                    y=last_week_conformal['Upper_Interval'],
-                    mode='lines',
-                    line=dict(width=0),
-                    fill='tonexty',  # Fill between this trace and the previous one
-                    fillcolor='rgba(68, 68, 68, 0.3)',
-                    name='P10/P90 prediction intervals'
-                ))
             fig.update_layout(title=f'Forecasts vs Actual for {actual_col}', xaxis_title='Date', yaxis_title='Value [MW]')
             st.plotly_chart(fig)
@@ -631,33 +560,6 @@ elif section == 'Forecasts':
         )
         return fig
-    if country_code == "BE":
-        st.header('MAE Ratio Comparison by Forecast Hour')
-        st.write("These clock-plots shows the relative Mean Absolute Error (rMAE) of different forecasting models compared to the ENTSO-E forecast, by the hour at which the forecast was made. "
-                "The rMAE is calculated as the ratio of the model's MAE to the ENTSO-E forecast's MAE.")
-        forecast_dict2 = forecast_dict.copy()
-        forecast_dict2 = {k: simplify_model_names(v) for k, v in forecast_dict.items()}
-        mae_comparison_fig = plot_mae_comparison_clock(forecast_dict2, 'Solar', 'rMAE Ratio Comparison for Solar', real_values_df=Data_BE)
-        st.plotly_chart(mae_comparison_fig)
-        mae_comparison_fig_wind_onshore = plot_mae_comparison_clock(forecast_dict2, 'Wind_onshore', 'MAE Ratio Comparison for Wind Onshore', real_values_df=Data_BE)
-        st.plotly_chart(mae_comparison_fig_wind_onshore)
-        mae_comparison_fig_wind_offshore = plot_mae_comparison_clock(forecast_dict2, 'Wind_offshore', 'MAE Ratio Comparison for Wind Offshore', real_values_df=Data_BE)
-        st.plotly_chart(mae_comparison_fig_wind_offshore)
-        mae_comparison_fig_load = plot_mae_comparison_clock(forecast_dict2, 'Load', 'MAE Ratio Comparison for Load', real_values_df=Data_BE)
-        st.plotly_chart(mae_comparison_fig_load)
     # Scatter plots for error distribution
@@ -683,177 +585,59 @@ elif section == 'Forecasts':
     output_text = f"The below metrics are calculated from the selected date range from {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}. This interval can be adjusted from the sidebar."
     st.write(output_text)
-    if country_code == "BE":
-        # Combine the two DataFrames on their index
-        df_combined = Historical_and_operational.join(Data_BE, how='inner')
-        # List of model columns from historical_forecast
-        model_columns = historical_forecast.columns
-        # Initialize dictionaries to store MAE and RMSE results for each variable
-        results_wind_onshore = {}
-        results_wind_offshore = {}
-        results_load = {}
-        results_solar = {}
-        # Mapping of variables to their corresponding naive models
-        naive_models = {
-            'Wind_onshore': 'Wind_onshore_DailyNaiveSeasonal',
-            'Wind_offshore': 'Wind_offshore_DailyNaiveSeasonal',
-            'Load': 'Load_WeeklyNaiveSeasonal',
-            'Solar': 'Solar_DailyNaiveSeasonal'
-        }
-        # Step 1: Calculate MAE, RMSE, and rMAE for each model
-        for col in model_columns:
-            # Extract the variable name by taking everything before the first underscore
-            base_variable = col.split('_')[0]
-            # Handle cases where variable names might be combined with multiple parts (e.g., "Load_LightGBMModel...")
-            if base_variable in ['Wind', 'Load', 'Solar']:
-                if 'onshore' in col:
-                    variable_name = 'Wind_onshore'
-                    results_dict = results_wind_onshore
-                elif 'offshore' in col:
-                    variable_name = 'Wind_offshore'
-                    results_dict = results_wind_offshore
-                else:
-                    variable_name = base_variable
-                    results_dict = results_load if base_variable == 'Load' else results_solar
             else:
-                variable_name = base_variable
-            # Construct the corresponding `variable_entsoe` column name
-            entsoe_column = f'{variable_name}_entsoe'
-            naive_model_col = naive_models.get(variable_name, None)
-            # Drop NaNs for the specific pair of columns before calculating MAE and RMSE
-            if entsoe_column in df_combined.columns and naive_model_col in df_combined.columns:
-                valid_data = df_combined[[col, entsoe_column]].dropna()
-                valid_naive_data = df_combined[[entsoe_column, naive_model_col]].dropna()
-                # Calculate MAE and RMSE for the model against the `variable_entsoe`
-                mae = np.mean(abs(valid_data[col] - valid_data[entsoe_column]))
-                rmse = np.sqrt(mean_squared_error(valid_data[col], valid_data[entsoe_column]))
-                # Calculate MAE for the Naive model
-                mae_naive = np.mean(abs(valid_naive_data[entsoe_column] - valid_naive_data[naive_model_col]))
-                # Calculate rMAE for the model
-                rMAE = mae / mae_naive if mae_naive != 0 else np.inf
-                # Store the results in the corresponding dictionary
-                results_dict[f'{col}'] = {'MAE': mae, 'RMSE': rmse, 'rMAE': rMAE}
-        # Step 2: Calculate MAE, RMSE, and rMAE for ENTSO-E forecasts specifically
-        for variable_name in naive_models.keys():
-            entsoe_column = f'{variable_name}_entsoe'
-            forecast_entsoe_column = f'{variable_name}_forecast_entsoe'
-            naive_model_col = naive_models[variable_name]
-            # Ensure that the ENTSO-E forecast is included in the results
-            if forecast_entsoe_column in df_combined.columns:
-                valid_data = df_combined[[forecast_entsoe_column, entsoe_column]].dropna()
-                valid_naive_data = df_combined[[entsoe_column, naive_model_col]].dropna()
-                # Calculate MAE and RMSE for the ENTSO-E forecast against the actuals
-                mae_entsoe = np.mean(abs(valid_data[forecast_entsoe_column] - valid_data[entsoe_column]))
-                rmse_entsoe = np.sqrt(mean_squared_error(valid_data[forecast_entsoe_column], valid_data[entsoe_column]))
-                # Calculate rMAE for the ENTSO-E forecast
-                mae_naive = np.mean(abs(valid_naive_data[entsoe_column] - valid_naive_data[naive_model_col]))
-                rMAE_entsoe = mae_entsoe / mae_naive if mae_naive != 0 else np.inf
-                # Add the ENTSO-E results to the corresponding dictionary
-                if variable_name == 'Wind_onshore':
-                    results_wind_onshore[forecast_entsoe_column] = {'MAE': mae_entsoe, 'RMSE': rmse_entsoe, 'rMAE': rMAE_entsoe}
-                elif variable_name == 'Wind_offshore':
-                    results_wind_offshore[forecast_entsoe_column] = {'MAE': mae_entsoe, 'RMSE': rmse_entsoe, 'rMAE': rMAE_entsoe}
-                elif variable_name == 'Load':
-                    results_load[forecast_entsoe_column] = {'MAE': mae_entsoe, 'RMSE': rmse_entsoe, 'rMAE': rMAE_entsoe}
-                elif variable_name == 'Solar':
-                    results_solar[forecast_entsoe_column] = {'MAE': mae_entsoe, 'RMSE': rmse_entsoe, 'rMAE': rMAE_entsoe}
-        # Convert the dictionaries to DataFrames and sort by rMAE
-        df_wind_onshore = pd.DataFrame.from_dict(results_wind_onshore, orient='index').sort_values(by='rMAE')
-        df_wind_offshore = pd.DataFrame.from_dict(results_wind_offshore, orient='index').sort_values(by='rMAE')
-        df_load = pd.DataFrame.from_dict(results_load, orient='index').sort_values(by='rMAE')
-        df_solar = pd.DataFrame.from_dict(results_solar, orient='index').sort_values(by='rMAE')
-        st.write("##### Wind Onshore:")
-        df_wind_onshore = simplify_model_names_in_index(df_wind_onshore)
-        st.dataframe(df_wind_onshore)
-        st.write("##### Wind Offshore:")
-        df_wind_offshore2 = simplify_model_names_in_index(df_wind_offshore)
-        st.dataframe(df_wind_offshore)
-        st.write("##### Load:")
-        df_load = simplify_model_names_in_index(df_load)
-        st.dataframe(df_load)
-        st.write("##### Solar:")
-        df_solar = simplify_model_names_in_index(df_solar)
-        st.dataframe(df_solar)
-    else:
-        data = data.loc[start_date:end_date]
-        accuracy_metrics = pd.DataFrame(columns=['MAE', 'rMAE'], index=['Load', 'Solar', 'Wind Onshore', 'Wind Offshore'])
-        for i in range(0, len(forecast_columns), 2):
-            actual_col = forecast_columns[i]
-            forecast_col = forecast_columns[i + 1]
-            if forecast_col in data.columns:
-                obs = data[actual_col]
-                pred = data[forecast_col]
-                error = pred - obs
-                mae = round(np.mean(np.abs(error)),2)
-                if 'Load' in actual_col:
-                    persistence = obs.shift(168)  # Weekly persistence
-                else:
-                    persistence = obs.shift(24)  # Daily persistence
-                # Using the whole year's data for rMAE calculations
-                rmae = round(mae / np.mean(np.abs(obs - persistence)),2)
-                row_label = 'Load' if 'Load' in actual_col else 'Solar' if 'Solar' in actual_col else 'Wind Offshore' if 'Wind_offshore' in actual_col else 'Wind Onshore'
-                accuracy_metrics.loc[row_label] = [mae, rmae]
-        accuracy_metrics.dropna(how='all', inplace=True)# Sort by rMAE (second column)
-        accuracy_metrics.sort_values(by=accuracy_metrics.columns[1], ascending=True, inplace=True)
-        accuracy_metrics = accuracy_metrics.round(4)
-        col1, col2 = st.columns([3, 2])
-        with col1:
-            st.dataframe(accuracy_metrics)
-        with col2:
-            st.markdown("""
-                <style>
-                .big-font {
-                    font-size: 20px;
-                    font-weight: 500;
-                }
-                </style>
-                <div class="big-font">
-                Equations
-                </div>
-                """, unsafe_allow_html=True)
-            st.markdown(r"""
-            $\text{MAE} = \frac{1}{n}\sum_{i=1}^{n}|y_i - \hat{y}_i|$
-            $\text{rMAE} = \frac{\text{MAE}}{MAE_{\text{Persistence Model}}}$
-            """)

     return df
+github_token = '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'
 if github_token:
+    forecast_dict = load_forecast(github_token)
+    historical_forecast=load_GitHub(github_token, 'Historical_forecast.csv')
+    Data_BE=load_GitHub(github_token, 'BE_Elia_Entsoe_UTC.csv')
+    Data_FR=load_GitHub(github_token, 'FR_Entsoe_UTC.csv')
+    Data_NL=load_GitHub(github_token, 'NL_Entsoe_UTC.csv')
+    Data_DE=load_GitHub(github_token, 'DE_Entsoe_UTC.csv')
     Data_BE=convert_European_time(Data_BE, 'Europe/Brussels')
     Data_FR=convert_European_time(Data_FR, 'Europe/Paris')
 else:
     print("Please enter your GitHub Personal Access Token to proceed.")
 # Main layout of the app
 col1, col2 = st.columns([5, 2])  # Adjust the ratio to better fit your layout needs
 """, unsafe_allow_html=True)
 countries = {
     'Netherlands': 'NL',
     'Germany': 'DE',
     st.write('The table below presents the data quality metrics for various energy-related datasets, focusing on the percentage of missing values and the occurrence of extreme or nonsensical values for the selected country.')
     data_quality=data.iloc[:-28]
     # Report % of missing values
     missing_values = data_quality[forecast_columns].isna().mean() * 100
     missing_values = missing_values.round(2)
     'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
     num_per_var=2
+    forecast_columns_line=forecast_columns
     for i in range(0, len(forecast_columns_line), num_per_var):
         actual_col = forecast_columns_line[i]
         forecast_col = forecast_columns_line[i + 1]
         if forecast_col in data.columns:
             fig = go.Figure()
             fig.add_trace(go.Scatter(x=last_week.index, y=last_week[actual_col], mode='lines', name='Actual'))
             fig.add_trace(go.Scatter(x=last_week.index, y=last_week[forecast_col], mode='lines', name='Forecast ENTSO-E'))
             fig.update_layout(title=f'Forecasts vs Actual for {actual_col}', xaxis_title='Date', yaxis_title='Value [MW]')
             st.plotly_chart(fig)
         )
         return fig
     # Scatter plots for error distribution
     output_text = f"The below metrics are calculated from the selected date range from {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}. This interval can be adjusted from the sidebar."
     st.write(output_text)
+    data = data.loc[start_date:end_date]
+    accuracy_metrics = pd.DataFrame(columns=['MAE', 'rMAE'], index=['Load', 'Solar', 'Wind Onshore', 'Wind Offshore'])
+    for i in range(0, len(forecast_columns), 2):
+        actual_col = forecast_columns[i]
+        forecast_col = forecast_columns[i + 1]
+        if forecast_col in data.columns:
+            obs = data[actual_col]
+            pred = data[forecast_col]
+            error = pred - obs
+            mae = round(np.mean(np.abs(error)),2)
+            if 'Load' in actual_col:
+                persistence = obs.shift(168)  # Weekly persistence
             else:
+                persistence = obs.shift(24)  # Daily persistence
+            # Using the whole year's data for rMAE calculations
+            rmae = round(mae / np.mean(np.abs(obs - persistence)),2)
+            row_label = 'Load' if 'Load' in actual_col else 'Solar' if 'Solar' in actual_col else 'Wind Offshore' if 'Wind_offshore' in actual_col else 'Wind Onshore'
+            accuracy_metrics.loc[row_label] = [mae, rmae]
+    accuracy_metrics.dropna(how='all', inplace=True)# Sort by rMAE (second column)
+    accuracy_metrics.sort_values(by=accuracy_metrics.columns[1], ascending=True, inplace=True)
+    accuracy_metrics = accuracy_metrics.round(4)
+    col1, col2 = st.columns([3, 2])
+    with col1:
+        st.dataframe(accuracy_metrics)
+    with col2:
+        st.markdown("""
+            <style>
+            .big-font {
+                font-size: 20px;
+                font-weight: 500;
+            }
+            </style>
+            <div class="big-font">
+            Equations
+            </div>
+            """, unsafe_allow_html=True)
+        st.markdown(r"""
+        $\text{MAE} = \frac{1}{n}\sum_{i=1}^{n}|y_i - \hat{y}_i|$
+        $\text{rMAE} = \frac{\text{MAE}}{MAE_{\text{Persistence Model}}}$
+        """)