mmmapms commited on
Commit
b26e24a
1 Parent(s): e9b4b1e

Upload app_margarida_V3.py

Browse files
Files changed (1) hide show
  1. app_margarida_V3.py +675 -0
app_margarida_V3.py ADDED
@@ -0,0 +1,675 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import pandas as pd
3
+ from io import StringIO
4
+ import streamlit as st
5
+ import os
6
+ import plotly.express as px
7
+ import plotly.graph_objects as go
8
+ import plotly.colors as pc
9
+ import numpy as np
10
+ from sklearn.metrics import mean_squared_error
11
+ from statsmodels.tsa.stattools import acf
12
+ from statsmodels.graphics.tsaplots import plot_acf
13
+ import matplotlib.pyplot as plt
14
+
15
+
16
+ ##GET ALL FILES FROM GITHUB
17
+ def load_GitHub(github_token, file_name):
18
+ url = f'https://raw.githubusercontent.com/margaridamascarenhas/Transparency_Data/main/{file_name}'
19
+ headers = {'Authorization': f'token {github_token}'}
20
+
21
+ response = requests.get(url, headers=headers)
22
+
23
+ if response.status_code == 200:
24
+ csv_content = StringIO(response.text)
25
+ df = pd.read_csv(csv_content)
26
+ if 'Date' in df.columns:
27
+ df['Date'] = pd.to_datetime(df['Date']) # Convert 'Date' column to datetime
28
+ df.set_index('Date', inplace=True) # Set 'Date' column as the index
29
+ #df.to_csv(file_name)
30
+ return df
31
+ else:
32
+ print(f"Failed to download {file_name}. Status code: {response.status_code}")
33
+ return None
34
+
35
+ def load_forecast(github_token):
36
+ predictions_dict = {}
37
+ for hour in range(24):
38
+ file_name = f'Predictions_{hour}h.csv'
39
+ df = load_GitHub(github_token, file_name)
40
+ if df is not None:
41
+ predictions_dict[file_name] = df
42
+ return predictions_dict
43
+
44
+ def convert_European_time(data, time_zone):
45
+ data.index = pd.to_datetime(data.index, utc=True)
46
+ data.index = data.index.tz_convert(time_zone)
47
+ data.index = data.index.tz_localize(None)
48
+ return data
49
+
50
+ github_token = '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'
51
+
52
+ if github_token:
53
+ forecast_dict = load_forecast(github_token)
54
+
55
+ historical_forecast=load_GitHub(github_token, 'Historical_forecast.csv')
56
+
57
+ Data_BE=load_GitHub(github_token, 'BE_Elia_Entsoe_UTC.csv')
58
+ Data_FR=load_GitHub(github_token, 'FR_Entsoe_UTC.csv')
59
+ Data_NL=load_GitHub(github_token, 'NL_Entsoe_UTC.csv')
60
+ Data_DE=load_GitHub(github_token, 'DE_Entsoe_UTC.csv')
61
+
62
+ Data_BE=convert_European_time(Data_BE, 'Europe/Brussels')
63
+ Data_FR=convert_European_time(Data_FR, 'Europe/Paris')
64
+ Data_NL=convert_European_time(Data_NL, 'Europe/Amsterdam')
65
+ Data_DE=convert_European_time(Data_DE, 'Europe/Berlin')
66
+
67
+
68
+ else:
69
+ print("Please enter your GitHub Personal Access Token to proceed.")
70
+
71
+ def conformal_predictions(data, target, my_forecast):
72
+ data['Residuals'] = data[my_forecast] - data[actual_col]
73
+ data['Hour'] = data.index.hour
74
+
75
+ min_date = data.index.min()
76
+ for date in data.index.normalize().unique():
77
+ if date >= min_date + pd.DateOffset(days=30):
78
+ start_date = date - pd.DateOffset(days=30)
79
+ end_date = date
80
+ calculation_window = data[start_date:end_date-pd.DateOffset(hours=1)]
81
+ quantiles = calculation_window.groupby('Hour')['Residuals'].quantile(0.8)
82
+ # Use .loc to safely access and modify data
83
+ if date in data.index:
84
+ current_day_data = data.loc[date.strftime('%Y-%m-%d')]
85
+ for hour in current_day_data['Hour'].unique():
86
+ if hour in quantiles.index:
87
+ hour_quantile = quantiles[hour]
88
+ idx = (data.index.normalize() == date) & (data.Hour == hour)
89
+ data.loc[idx, 'Quantile_80'] = hour_quantile
90
+ data.loc[idx, 'Lower_Interval'] = data.loc[idx, my_forecast] - hour_quantile
91
+ data.loc[idx, 'Upper_Interval'] = data.loc[idx, my_forecast] + hour_quantile
92
+ #data.reset_index(inplace=True)
93
+ return data
94
+
95
+
96
+ st.title("Transparency++")
97
+
98
+ countries = {
99
+ 'Belgium': 'BE',
100
+ 'Netherlands': 'NL',
101
+ 'Germany': 'DE',
102
+ 'France': 'FR',
103
+ }
104
+
105
+
106
+ st.sidebar.header('Filters')
107
+
108
+ selected_country = st.sidebar.selectbox('Select Country', list(countries.keys()))
109
+
110
+
111
+ st.write()
112
+ date_range = st.sidebar.date_input("Select Date Range for Metrics Calculation:",
113
+ value=(pd.to_datetime("2024-01-01"), pd.to_datetime(pd.Timestamp('today'))))
114
+
115
+ # Ensure the date range provides two dates
116
+ if len(date_range) == 2:
117
+ start_date = pd.Timestamp(date_range[0])
118
+ end_date = pd.Timestamp(date_range[1])
119
+ else:
120
+ st.error("Please select a valid date range.")
121
+ st.stop()
122
+
123
+ # Sidebar with radio buttons for different sections
124
+ section = st.sidebar.radio('Section', ['Data', 'Forecasts', 'Insights'])
125
+
126
+
127
+ country_code = countries[selected_country]
128
+ if country_code == 'BE':
129
+ data = Data_BE
130
+ weather_columns = ['Temperature', 'Wind Speed Onshore', 'Wind Speed Offshore']
131
+ data['Temperature'] = data['temperature_2m_8']
132
+ data['Wind Speed Offshore'] = data['wind_speed_100m_4']
133
+ data['Wind Speed Onshore'] = data['wind_speed_100m_8']
134
+
135
+ elif country_code == 'DE':
136
+ data = Data_DE
137
+ weather_columns = ['Temperature', 'Wind Speed']
138
+ data['Temperature'] = data['temperature_2m']
139
+ data['Wind Speed'] = data['wind_speed_100m']
140
+
141
+ elif country_code == 'NL':
142
+ data = Data_NL
143
+ weather_columns = ['Temperature', 'Wind Speed']
144
+ data['Temperature'] = data['temperature_2m']
145
+ data['Wind Speed'] = data['wind_speed_100m']
146
+
147
+ elif country_code == 'FR':
148
+ data = Data_FR
149
+ weather_columns = ['Temperature', 'Wind Speed']
150
+ data['Temperature'] = data['temperature_2m']
151
+ data['Wind Speed'] = data['wind_speed_100m']
152
+
153
+ def add_feature(df2, df_main):
154
+ #df_main.index = pd.to_datetime(df_main.index)
155
+ #df2.index = pd.to_datetime(df2.index)
156
+ df_combined = df_main.combine_first(df2)
157
+ last_date_df1 = df_main.index.max()
158
+ first_date_df2 = df2.index.min()
159
+ if first_date_df2 == last_date_df1 + pd.Timedelta(hours=1):
160
+ df_combined = pd.concat([df_main, df2[df2.index > last_date_df1]], axis=0)
161
+ #df_combined.reset_index(inplace=True)
162
+ return df_combined
163
+ #data.index = data.index.tz_localize('UTC')
164
+ data = data.loc[start_date:end_date]
165
+
166
+ forecast_columns = [
167
+ 'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
168
+
169
+ if section == 'Data':
170
+ st.header("Data")
171
+ st.write("""
172
+ This section allows you to explore and upload your datasets.
173
+ You can visualize raw data, clean it, and prepare it for analysis.
174
+ """)
175
+
176
+ st.header('Data Quality')
177
+
178
+ output_text = f"The below percentages are calculated from the selected date range from {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}. This interval can be adjusted from the sidebar."
179
+ st.write(output_text)
180
+
181
+ # Report % of missing values
182
+ missing_values = data[forecast_columns].isna().mean() * 100
183
+ missing_values = missing_values.round(2)
184
+
185
+ installed_capacities = {
186
+ 'FR': { 'Solar': 17419, 'Wind Offshore': 1483, 'Wind Onshore': 22134},
187
+ 'DE': { 'Solar': 73821, 'Wind Offshore': 8386, 'Wind Onshore': 59915},
188
+ 'BE': { 'Solar': 8789, 'Wind Offshore': 2262, 'Wind Onshore': 3053},
189
+ 'NL': { 'Solar': 22590, 'Wind Offshore': 3220, 'Wind Onshore': 6190},
190
+ }
191
+
192
+ if country_code not in installed_capacities:
193
+ st.error(f"Installed capacities not defined for country code '{country_code}'.")
194
+ st.stop()
195
+
196
+
197
+ # Report % of extreme, impossible values for the selected country
198
+ capacities = installed_capacities[country_code]
199
+ extreme_values = {}
200
+
201
+ for col in forecast_columns:
202
+ if 'Solar_entsoe' in col:
203
+ extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Solar'])).mean() * 100
204
+ elif 'Solar_forecast_entsoe' in col:
205
+ extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Solar'])).mean() * 100
206
+ elif 'Wind_onshore_entsoe' in col:
207
+ extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Onshore'])).mean() * 100
208
+ elif 'Wind_onshore_forecast_entsoe' in col:
209
+ extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Onshore'])).mean() * 100
210
+ elif 'Wind_offshore_entsoe' in col:
211
+ extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Offshore'])).mean() * 100
212
+ elif 'Wind_offshore_forecast_entsoe' in col:
213
+ extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Offshore'])).mean() * 100
214
+ elif 'Load_entsoe' in col:
215
+ extreme_values[col] = ((data[col] < 0)).mean() * 100
216
+ elif 'Load_forecast_entsoe' in col:
217
+ extreme_values[col] = ((data[col] < 0)).mean() * 100
218
+
219
+
220
+ extreme_values = pd.Series(extreme_values).round(2)
221
+
222
+ # Combine all metrics into one DataFrame
223
+ metrics_df = pd.DataFrame({
224
+ 'Missing Values (%)': missing_values,
225
+ 'Extreme/Nonsensical Values (%)': extreme_values,
226
+ })
227
+
228
+ st.markdown(
229
+ """
230
+ <style>
231
+ .dataframe {font-size: 45px !important;}
232
+ </style>
233
+ """,
234
+ unsafe_allow_html=True
235
+ )
236
+
237
+ st.dataframe(metrics_df)
238
+
239
+ st.write('<b><u>Missing values (%)</u></b>: Percentage of missing values in the dataset', unsafe_allow_html=True)
240
+ st.write('<b><u>Extreme/Nonsensical values (%)</u></b>: Values that are considered implausible such as negative or out-of-bound values i.e., (generation<0) or (generation>capacity)', unsafe_allow_html=True)
241
+
242
+ # Section 2: Forecasts
243
+ elif section == 'Forecasts':
244
+
245
+ st.header('Forecast Quality')
246
+
247
+ # Time series for last 1 week
248
+ st.subheader('Time Series: Last 1 Week')
249
+ last_week = Data_BE.loc[Data_BE.index >= (data.index[-1] - pd.Timedelta(days=7))]
250
+ st.write('The below plots show the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform between the selected data range.')
251
+ forecast_columns_operational = [
252
+ 'Load_entsoe','Load_forecast_entsoe', 'Load_LightGBMModel.7D.TimeCov.Temp.Forecast_elia', 'Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_onshore_LightGBMModel.1D.TimeCov.Temp.Forecast_elia','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Wind_offshore_LightGBMModel.1D.TimeCov.Temp.Forecast_elia','Solar_entsoe','Solar_forecast_entsoe', 'Solar_LightGBMModel.1D.TimeCov.Temp.Forecast_elia']
253
+ forecast_columns = [
254
+ 'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
255
+
256
+ operation_forecast_load=forecast_dict['Predictions_10h.csv'].filter(like='Load_', axis=1)
257
+ operation_forecast_res=forecast_dict['Predictions_17h.csv'].filter(regex='^(?!Load_)')
258
+ operation_forecast_load.columns = [col.replace('_entsoe.', '_').replace('Naive.7D', 'WeeklyNaiveSeasonal') for col in operation_forecast_load.columns]
259
+ operation_forecast_res.columns = [col.replace('_entsoe.', '_').replace('Naive.1D', 'DailyNaiveSeasonal') for col in operation_forecast_res.columns]
260
+ Historical_and_Load=add_feature(operation_forecast_load, historical_forecast)
261
+ Historical_and_operational=add_feature(operation_forecast_res, Historical_and_Load)
262
+ #print(Historical_and_operational.filter(like='Forecast_elia', axis=1))
263
+ best_forecast = Historical_and_operational.filter(like='Forecast_elia', axis=1)
264
+ df_combined = Historical_and_operational.join(Data_BE, how='inner')
265
+ last_week_best_forecast = best_forecast.loc[best_forecast.index >= (best_forecast.index[-24] - pd.Timedelta(days=8))]
266
+
267
+
268
+ for i in range(0, len(forecast_columns_operational), 3):
269
+ actual_col = forecast_columns_operational[i]
270
+ forecast_col = forecast_columns_operational[i + 1]
271
+ my_forecast = forecast_columns_operational[i + 2]
272
+
273
+
274
+ if forecast_col in data.columns:
275
+ fig = go.Figure()
276
+ fig.add_trace(go.Scatter(x=last_week.index, y=last_week[actual_col], mode='lines', name='Actual'))
277
+ fig.add_trace(go.Scatter(x=last_week.index, y=last_week[forecast_col], mode='lines', name='Forecast ENTSO-E'))
278
+
279
+ if country_code=='BE':
280
+ conformal=conformal_predictions(df_combined, actual_col, my_forecast)
281
+ last_week_conformal = conformal.loc[conformal.index >= (conformal.index[-24] - pd.Timedelta(days=8))]
282
+ if actual_col =='Load_entsoe':
283
+ last_week_conformal = conformal.loc[conformal.index >= (conformal.index[-24] - pd.Timedelta(days=5))]
284
+ fig.add_trace(go.Scatter(x=last_week_best_forecast.index, y=last_week_best_forecast[my_forecast], mode='lines', name='Forecast EDS'))
285
+
286
+ fig.add_trace(go.Scatter(
287
+ x=last_week_conformal.index,
288
+ y=last_week_conformal['Lower_Interval'],
289
+ mode='lines',
290
+ line=dict(width=0),
291
+ showlegend=False
292
+ ))
293
+
294
+ # Add the upper interval trace and fill to the lower interval
295
+ fig.add_trace(go.Scatter(
296
+ x=last_week_conformal.index,
297
+ y=last_week_conformal['Upper_Interval'],
298
+ mode='lines',
299
+ line=dict(width=0),
300
+ fill='tonexty', # Fill between this trace and the previous one
301
+ fillcolor='rgba(68, 68, 68, 0.3)',
302
+ name='P10/P90 prediction intervals'
303
+ ))
304
+
305
+
306
+ fig.update_layout(title=f'Forecasts vs Actual for {actual_col}', xaxis_title='Date', yaxis_title='Value [MW]')
307
+
308
+ st.plotly_chart(fig)
309
+
310
+
311
+ def plot_category(df_dict, category_prefix, title):
312
+ fig = go.Figure()
313
+
314
+ # Define base colors for each model
315
+ model_colors = {
316
+ 'LightGBMModel.TimeCov.Temp.Forecast_elia': '#1f77b4', # Blue
317
+ 'LightGBMModel.TimeCov.Temp': '#2ca02c', # Green
318
+ 'Naive': '#ff7f0e' # Orange
319
+ }
320
+
321
+ # To keep track of which model has been added to the legend
322
+ legend_added = {'LightGBMModel.TimeCov.Temp.Forecast_elia': False, 'LightGBMModel.TimeCov.Temp': False, 'Naive': False}
323
+
324
+ for file_name, df in df_dict.items():
325
+ # Extract the hour from the filename, assuming the format is "Predictions_Xh.csv"
326
+ hour = int(file_name.split('_')[1].replace('h.csv', ''))
327
+
328
+ filtered_columns = [col for col in df.columns if col.startswith(category_prefix)]
329
+ for column in filtered_columns:
330
+ # Identify the model type with more precise logic
331
+ if 'LightGBMModel' in column:
332
+ if 'Forecast_elia' in column:
333
+ model_key = 'LightGBMModel.TimeCov.Temp.Forecast_elia'
334
+ elif 'TimeCov' in column:
335
+ model_key = 'LightGBMModel.TimeCov.Temp'
336
+ elif 'Naive' in column:
337
+ model_key = 'Naive'
338
+ else:
339
+ continue # Skip if it doesn't match any model type
340
+
341
+ # Extract the relevant part of the model name
342
+ parts = column.split('.')
343
+ model_name_parts = parts[1:] # Skip the variable prefix
344
+ model_name = '.'.join(model_name_parts) # Rejoin the parts to form the model name
345
+
346
+ # Get the base color for the model
347
+ base_color = model_colors[model_key]
348
+
349
+ # Calculate the color shade based on the hour
350
+ color_scale = pc.hex_to_rgb(base_color)
351
+ scale_factor = 0.3 + (hour / 40) # Adjust scale to ensure the gradient is visible
352
+ adjusted_color = tuple(int(c * scale_factor) for c in color_scale)
353
+ # Convert to RGBA with transparency for plot lines
354
+ line_color = f'rgba({adjusted_color[0]}, {adjusted_color[1]}, {adjusted_color[2]}, 0.1)' # Transparent color for lines
355
+
356
+ # Combine the hour and the model name for the legend, but only add the legend entry once
357
+ show_legend = not legend_added[model_key]
358
+
359
+ fig.add_trace(go.Scatter(
360
+ x=df.index, # Assuming 'Date' is the index, use 'df.index' for x-axis
361
+ y=df[column],
362
+ mode='lines',
363
+ name=model_name if show_legend else None, # Use the model name for the legend, but only once
364
+ line=dict(color=base_color if show_legend else line_color), # Use opaque color for legend, transparent for lines
365
+ showlegend=show_legend, # Show legend only once per model
366
+ legendgroup=model_key # Grouping for consistent legend color
367
+ ))
368
+
369
+ # Mark that this model has been added to the legend
370
+ if show_legend:
371
+ legend_added[model_key] = True
372
+
373
+ # Add real values as a separate trace, if provided
374
+ filtered_Data_BE_df = Data_BE.loc[df.index]
375
+
376
+ if filtered_Data_BE_df[f'{category_prefix}_entsoe'].notna().any():
377
+ fig.add_trace(go.Scatter(
378
+ x=filtered_Data_BE_df.index,
379
+ y=filtered_Data_BE_df[f'{category_prefix}_entsoe'],
380
+ mode='lines',
381
+ name=f'Actual {category_prefix}',
382
+ line=dict(color='black', width=2), # Black line for real values
383
+ showlegend=True # Always show this in the legend
384
+ ))
385
+
386
+ # Update layout to position the legend at the top, side by side
387
+ fig.update_layout(
388
+ title=dict(
389
+ text=title,
390
+ x=0, # Center the title horizontally
391
+ y=1.00, # Slightly lower the title to create more space
392
+ xanchor='left',
393
+ yanchor='top'
394
+ ),
395
+ xaxis_title='Date',
396
+ yaxis_title='Value',
397
+ legend=dict(
398
+ orientation="h", # Horizontal legend
399
+ yanchor="bottom", # Align to the bottom of the legend box
400
+ y=1, # Increase y position to avoid overlap with the title
401
+ xanchor="center", # Center the legend horizontally
402
+ x=0.5 # Position at the center of the plot
403
+ )
404
+ )
405
+ return fig
406
+
407
+ if country_code == "BE":
408
+ st.header('EDS Forecasts by Hour')
409
+
410
+ solar_fig = plot_category(forecast_dict, 'Solar', 'Solar Predictions')
411
+ st.plotly_chart(solar_fig)
412
+
413
+ wind_offshore_fig = plot_category(forecast_dict, 'Wind_offshore', 'Wind Offshore Predictions')
414
+ st.plotly_chart(wind_offshore_fig)
415
+
416
+ wind_onshore_fig = plot_category(forecast_dict, 'Wind_onshore', 'Wind Onshore Predictions')
417
+ st.plotly_chart(wind_onshore_fig)
418
+
419
+ load_fig = plot_category(forecast_dict, 'Load', 'Load Predictions')
420
+ st.plotly_chart(load_fig)
421
+
422
+ # Scatter plots for error distribution
423
+ st.subheader('Error Distribution')
424
+ st.write('The below scatter plots show the error distribution of all three fields: Solar, Wind and Load between the selected date range')
425
+ for i in range(0, len(forecast_columns), 2):
426
+ actual_col = forecast_columns[i]
427
+ forecast_col = forecast_columns[i + 1]
428
+ if forecast_col in data.columns:
429
+ obs = last_week[actual_col]
430
+ pred = last_week[forecast_col]
431
+ error = pred - obs
432
+
433
+ fig = px.scatter(x=obs, y=pred, labels={'x': 'Observed [MW]', 'y': 'Predicted by ENTSO-E [MW]'})
434
+ fig.update_layout(title=f'Error Distribution for {forecast_col}')
435
+ st.plotly_chart(fig)
436
+
437
+
438
+
439
+ st.subheader('Accuracy Metrics (Sorted by rMAE):')
440
+
441
+ if country_code == "BE":
442
+
443
+ # Combine the two DataFrames on their index
444
+ df_combined = Historical_and_operational.join(Data_BE, how='inner')
445
+ # List of model columns from historical_forecast
446
+ model_columns = historical_forecast.columns
447
+
448
+ # Initialize dictionaries to store MAE and RMSE results for each variable
449
+ results_wind_onshore = {}
450
+ results_wind_offshore = {}
451
+ results_load = {}
452
+ results_solar = {}
453
+
454
+ # Mapping of variables to their corresponding naive models
455
+ naive_models = {
456
+ 'Wind_onshore': 'Wind_onshore_DailyNaiveSeasonal',
457
+ 'Wind_offshore': 'Wind_offshore_DailyNaiveSeasonal',
458
+ 'Load': 'Load_WeeklyNaiveSeasonal',
459
+ 'Solar': 'Solar_DailyNaiveSeasonal'
460
+ }
461
+
462
+ # Step 1: Calculate MAE, RMSE, and rMAE for each model
463
+ for col in model_columns:
464
+ # Extract the variable name by taking everything before the first underscore
465
+ base_variable = col.split('_')[0]
466
+
467
+ # Handle cases where variable names might be combined with multiple parts (e.g., "Load_LightGBMModel...")
468
+ if base_variable in ['Wind', 'Load', 'Solar']:
469
+ if 'onshore' in col:
470
+ variable_name = 'Wind_onshore'
471
+ results_dict = results_wind_onshore
472
+ elif 'offshore' in col:
473
+ variable_name = 'Wind_offshore'
474
+ results_dict = results_wind_offshore
475
+ else:
476
+ variable_name = base_variable
477
+ results_dict = results_load if base_variable == 'Load' else results_solar
478
+ else:
479
+ variable_name = base_variable
480
+
481
+ # Construct the corresponding `variable_entsoe` column name
482
+ entsoe_column = f'{variable_name}_entsoe'
483
+ naive_model_col = naive_models.get(variable_name, None)
484
+
485
+ # Drop NaNs for the specific pair of columns before calculating MAE and RMSE
486
+ if entsoe_column in df_combined.columns and naive_model_col in df_combined.columns:
487
+ valid_data = df_combined[[col, entsoe_column]].dropna()
488
+ valid_naive_data = df_combined[[entsoe_column, naive_model_col]].dropna()
489
+
490
+ # Calculate MAE and RMSE for the model against the `variable_entsoe`
491
+ mae = np.mean(abs(valid_data[col] - valid_data[entsoe_column]))
492
+ rmse = np.sqrt(mean_squared_error(valid_data[col], valid_data[entsoe_column]))
493
+
494
+ # Calculate MAE for the Naive model
495
+ mae_naive = np.mean(abs(valid_naive_data[entsoe_column] - valid_naive_data[naive_model_col]))
496
+
497
+ # Calculate rMAE for the model
498
+ rMAE = mae / mae_naive if mae_naive != 0 else np.inf
499
+
500
+ # Store the results in the corresponding dictionary
501
+ results_dict[f'{col}'] = {'MAE': mae, 'RMSE': rmse, 'rMAE': rMAE}
502
+
503
+ # Step 2: Calculate MAE, RMSE, and rMAE for ENTSO-E forecasts specifically
504
+ for variable_name in naive_models.keys():
505
+ entsoe_column = f'{variable_name}_entsoe'
506
+ forecast_entsoe_column = f'{variable_name}_forecast_entsoe'
507
+ naive_model_col = naive_models[variable_name]
508
+
509
+ # Ensure that the ENTSO-E forecast is included in the results
510
+ if forecast_entsoe_column in df_combined.columns:
511
+ valid_data = df_combined[[forecast_entsoe_column, entsoe_column]].dropna()
512
+ valid_naive_data = df_combined[[entsoe_column, naive_model_col]].dropna()
513
+
514
+ # Calculate MAE and RMSE for the ENTSO-E forecast against the actuals
515
+ mae_entsoe = np.mean(abs(valid_data[forecast_entsoe_column] - valid_data[entsoe_column]))
516
+ rmse_entsoe = np.sqrt(mean_squared_error(valid_data[forecast_entsoe_column], valid_data[entsoe_column]))
517
+
518
+ # Calculate rMAE for the ENTSO-E forecast
519
+ mae_naive = np.mean(abs(valid_naive_data[entsoe_column] - valid_naive_data[naive_model_col]))
520
+ rMAE_entsoe = mae_entsoe / mae_naive if mae_naive != 0 else np.inf
521
+
522
+ # Add the ENTSO-E results to the corresponding dictionary
523
+ if variable_name == 'Wind_onshore':
524
+ results_wind_onshore[forecast_entsoe_column] = {'MAE': mae_entsoe, 'RMSE': rmse_entsoe, 'rMAE': rMAE_entsoe}
525
+ elif variable_name == 'Wind_offshore':
526
+ results_wind_offshore[forecast_entsoe_column] = {'MAE': mae_entsoe, 'RMSE': rmse_entsoe, 'rMAE': rMAE_entsoe}
527
+ elif variable_name == 'Load':
528
+ results_load[forecast_entsoe_column] = {'MAE': mae_entsoe, 'RMSE': rmse_entsoe, 'rMAE': rMAE_entsoe}
529
+ elif variable_name == 'Solar':
530
+ results_solar[forecast_entsoe_column] = {'MAE': mae_entsoe, 'RMSE': rmse_entsoe, 'rMAE': rMAE_entsoe}
531
+
532
+ # Convert the dictionaries to DataFrames and sort by rMAE
533
+ df_wind_onshore = pd.DataFrame.from_dict(results_wind_onshore, orient='index').sort_values(by='rMAE')
534
+ df_wind_offshore = pd.DataFrame.from_dict(results_wind_offshore, orient='index').sort_values(by='rMAE')
535
+ df_load = pd.DataFrame.from_dict(results_load, orient='index').sort_values(by='rMAE')
536
+ df_solar = pd.DataFrame.from_dict(results_solar, orient='index').sort_values(by='rMAE')
537
+
538
+
539
+ st.write("##### Wind Onshore:")
540
+ st.dataframe(df_wind_onshore)
541
+
542
+ st.write("##### Wind Offshore:")
543
+ st.dataframe(df_wind_offshore)
544
+
545
+ st.write("##### Load:")
546
+ st.dataframe(df_load)
547
+
548
+ st.write("##### Solar:")
549
+ st.dataframe(df_solar)
550
+
551
+
552
+
553
+ else:
554
+ accuracy_metrics = pd.DataFrame(columns=['MAE', 'rMAE'], index=['Load', 'Solar', 'Wind Onshore', 'Wind Offshore'])
555
+
556
+ for i in range(0, len(forecast_columns), 2):
557
+ actual_col = forecast_columns[i]
558
+ forecast_col = forecast_columns[i + 1]
559
+ if forecast_col in data.columns:
560
+ obs = data[actual_col]
561
+ pred = data[forecast_col]
562
+ error = pred - obs
563
+
564
+ mae = round(np.mean(np.abs(error)),2)
565
+ if 'Load' in actual_col:
566
+ persistence = obs.shift(168) # Weekly persistence
567
+ else:
568
+ persistence = obs.shift(24) # Daily persistence
569
+
570
+ # Using the whole year's data for rMAE calculations
571
+ rmae = round(mae / np.mean(np.abs(obs - persistence)),2)
572
+
573
+ row_label = 'Load' if 'Load' in actual_col else 'Solar' if 'Solar' in actual_col else 'Wind Offshore' if 'Wind_offshore' in actual_col else 'Wind Onshore'
574
+ accuracy_metrics.loc[row_label] = [mae, rmae]
575
+
576
+ accuracy_metrics.dropna(how='all', inplace=True)# Sort by rMAE (second column)
577
+ accuracy_metrics.sort_values(by=accuracy_metrics.columns[1], ascending=True, inplace=True)
578
+ accuracy_metrics = accuracy_metrics.round(4)
579
+
580
+ col1, col2 = st.columns([3, 2])
581
+
582
+ with col1:
583
+ st.dataframe(accuracy_metrics)
584
+
585
+ with col2:
586
+ st.markdown("""
587
+ <style>
588
+ .big-font {
589
+ font-size: 20px;
590
+ font-weight: 500;
591
+ }
592
+ </style>
593
+ <div class="big-font">
594
+ Equations
595
+ </div>
596
+ """, unsafe_allow_html=True)
597
+
598
+ st.markdown(r"""
599
+ $\text{MAE} = \frac{1}{n}\sum_{i=1}^{n}|y_i - \hat{y}_i|$
600
+
601
+
602
+ $\text{rMAE} = \frac{\text{MAE}}{MAE_{\text{Persistence Model}}}$
603
+
604
+
605
+ """)
606
+
607
+
608
+
609
+ st.subheader('ACF plots of Errors')
610
+ st.write('The below plots show the ACF (Auto-Correlation Function) for the errors of all three fields: Solar, Wind and Load.')
611
+
612
+ for i in range(0, len(forecast_columns), 2):
613
+ actual_col = forecast_columns[i]
614
+ forecast_col = forecast_columns[i + 1]
615
+ if forecast_col in data.columns:
616
+ obs = data[actual_col]
617
+ pred = data[forecast_col]
618
+ error = pred - obs
619
+
620
+ st.write(f"**ACF of Errors for {actual_col}**")
621
+ fig, ax = plt.subplots(figsize=(10, 5))
622
+ plot_acf(error.dropna(), ax=ax)
623
+ st.pyplot(fig)
624
+
625
+ acf_values = acf(error.dropna(), nlags=240)
626
+
627
+ # Section 3: Insights
628
+ elif section == 'Insights':
629
+ st.header("Insights")
630
+ st.write("""
631
+ This section provides insights derived from the data and forecasts.
632
+ You can visualize trends, anomalies, and other important findings.
633
+ """)
634
+
635
+ # Scatter plots for correlation between wind, solar, and load
636
+ st.subheader('Correlation between Wind, Solar, and Load')
637
+ st.write('The below scatter plots for correlation between all three fields: Solar, Wind and Load.')
638
+
639
+ combinations = [('Solar_entsoe', 'Load_entsoe'), ('Wind_onshore_entsoe', 'Load_entsoe'), ('Wind_offshore_entsoe', 'Load_entsoe'), ('Solar_entsoe', 'Wind_onshore_entsoe'), ('Solar_entsoe', 'Wind_offshore_entsoe')]
640
+
641
+ for x_col, y_col in combinations:
642
+ if x_col in data.columns and y_col in data.columns:
643
+ # For solar combinations, filter out zero values
644
+ if 'Solar_entsoe' in x_col:
645
+ filtered_data = data[data['Solar_entsoe'] > 0]
646
+ x_values = filtered_data[x_col]
647
+ y_values = filtered_data[y_col]
648
+ else:
649
+ x_values = data[x_col]
650
+ y_values = data[y_col]
651
+
652
+ corr_coef = x_values.corr(y_values)
653
+ fig = px.scatter(
654
+ x=x_values,
655
+ y=y_values,
656
+ labels={'x': f'{x_col} [MW]', 'y': f'{y_col} [MW]'},
657
+ title=f'{x_col} vs {y_col} (Correlation: {corr_coef:.2f})', color_discrete_sequence=['grey'])
658
+ st.plotly_chart(fig)
659
+
660
+
661
+ st.subheader('Weather vs. Generation/Demand')
662
+ st.write('The below scatter plots show the relation between weather parameters (i.e., Temperature, Wind Speed) and generation/demand.')
663
+
664
+ for weather_col in weather_columns:
665
+ for actual_col in ['Load_entsoe', 'Solar_entsoe', 'Wind_onshore_entsoe', 'Wind_offshore_entsoe']:
666
+ if weather_col in data.columns and actual_col in data.columns:
667
+ clean_label = actual_col.replace('_entsoe', '')
668
+ if weather_col == 'Temperature':
669
+ fig = px.scatter(x=data[weather_col], y=data[actual_col], labels={'x': f'{weather_col} (°C)', 'y': f'{clean_label} Generation [MW]'}, color_discrete_sequence=['orange'])
670
+ else:
671
+ fig = px.scatter(x=data[weather_col], y=data[actual_col], labels={'x': f'{weather_col} (km/h)', 'y': clean_label})
672
+ fig.update_layout(title=f'{weather_col} vs {actual_col}')
673
+ st.plotly_chart(fig)
674
+
675
+