mmmapms commited on
Commit
6872779
1 Parent(s): 7bf8667

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +674 -674
app.py CHANGED
@@ -1,675 +1,675 @@
1
- import requests
2
- import pandas as pd
3
- from io import StringIO
4
- import streamlit as st
5
- import os
6
- import plotly.express as px
7
- import plotly.graph_objects as go
8
- import plotly.colors as pc
9
- import numpy as np
10
- from sklearn.metrics import mean_squared_error
11
- from statsmodels.tsa.stattools import acf
12
- from statsmodels.graphics.tsaplots import plot_acf
13
- import matplotlib.pyplot as plt
14
-
15
-
16
- ##GET ALL FILES FROM GITHUB
17
- def load_GitHub(github_token, file_name):
18
- url = f'https://raw.githubusercontent.com/margaridamascarenhas/Transparency_Data/main/{file_name}'
19
- headers = {'Authorization': f'token {github_token}'}
20
-
21
- response = requests.get(url, headers=headers)
22
-
23
- if response.status_code == 200:
24
- csv_content = StringIO(response.text)
25
- df = pd.read_csv(csv_content)
26
- if 'Date' in df.columns:
27
- df['Date'] = pd.to_datetime(df['Date']) # Convert 'Date' column to datetime
28
- df.set_index('Date', inplace=True) # Set 'Date' column as the index
29
- #df.to_csv(file_name)
30
- return df
31
- else:
32
- print(f"Failed to download {file_name}. Status code: {response.status_code}")
33
- return None
34
-
35
- def load_forecast(github_token):
36
- predictions_dict = {}
37
- for hour in range(24):
38
- file_name = f'Predictions_{hour}h.csv'
39
- df = load_GitHub(github_token, file_name)
40
- if df is not None:
41
- predictions_dict[file_name] = df
42
- return predictions_dict
43
-
44
- def convert_European_time(data, time_zone):
45
- data.index = pd.to_datetime(data.index, utc=True)
46
- data.index = data.index.tz_convert(time_zone)
47
- data.index = data.index.tz_localize(None)
48
- return data
49
-
50
- github_token = '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'
51
-
52
- if github_token:
53
- forecast_dict = load_forecast(github_token)
54
-
55
- historical_forecast=load_GitHub(github_token, 'Historical_forecast.csv')
56
-
57
- Data_BE=load_GitHub(github_token, 'BE_Elia_Entsoe_UTC.csv')
58
- Data_FR=load_GitHub(github_token, 'FR_Entsoe_UTC.csv')
59
- Data_NL=load_GitHub(github_token, 'NL_Entsoe_UTC.csv')
60
- Data_DE=load_GitHub(github_token, 'DE_Entsoe_UTC.csv')
61
-
62
- Data_BE=convert_European_time(Data_BE, 'Europe/Brussels')
63
- Data_FR=convert_European_time(Data_FR, 'Europe/Paris')
64
- Data_NL=convert_European_time(Data_NL, 'Europe/Amsterdam')
65
- Data_DE=convert_European_time(Data_DE, 'Europe/Berlin')
66
-
67
-
68
- else:
69
- print("Please enter your GitHub Personal Access Token to proceed.")
70
-
71
- def conformal_predictions(data, target, my_forecast):
72
- data['Residuals'] = data[my_forecast] - data[actual_col]
73
- data['Hour'] = data.index.hour
74
-
75
- min_date = data.index.min()
76
- for date in data.index.normalize().unique():
77
- if date >= min_date + pd.DateOffset(days=30):
78
- start_date = date - pd.DateOffset(days=30)
79
- end_date = date
80
- calculation_window = data[start_date:end_date-pd.DateOffset(hours=1)]
81
- quantiles = calculation_window.groupby('Hour')['Residuals'].quantile(0.8)
82
- # Use .loc to safely access and modify data
83
- if date in data.index:
84
- current_day_data = data.loc[date.strftime('%Y-%m-%d')]
85
- for hour in current_day_data['Hour'].unique():
86
- if hour in quantiles.index:
87
- hour_quantile = quantiles[hour]
88
- idx = (data.index.normalize() == date) & (data.Hour == hour)
89
- data.loc[idx, 'Quantile_80'] = hour_quantile
90
- data.loc[idx, 'Lower_Interval'] = data.loc[idx, my_forecast] - hour_quantile
91
- data.loc[idx, 'Upper_Interval'] = data.loc[idx, my_forecast] + hour_quantile
92
- #data.reset_index(inplace=True)
93
- return data
94
-
95
-
96
- st.title("Transparency++")
97
-
98
- countries = {
99
- 'Belgium': 'BE',
100
- 'Netherlands': 'NL',
101
- 'Germany': 'DE',
102
- 'France': 'FR',
103
- }
104
-
105
-
106
- st.sidebar.header('Filters')
107
-
108
- selected_country = st.sidebar.selectbox('Select Country', list(countries.keys()))
109
-
110
-
111
- st.write()
112
- date_range = st.sidebar.date_input("Select Date Range for Metrics Calculation:",
113
- value=(pd.to_datetime("2024-01-01"), pd.to_datetime(pd.Timestamp('today'))))
114
-
115
- # Ensure the date range provides two dates
116
- if len(date_range) == 2:
117
- start_date = pd.Timestamp(date_range[0])
118
- end_date = pd.Timestamp(date_range[1])
119
- else:
120
- st.error("Please select a valid date range.")
121
- st.stop()
122
-
123
- # Sidebar with radio buttons for different sections
124
- section = st.sidebar.radio('Section', ['Data', 'Forecasts', 'Insights'])
125
-
126
-
127
- country_code = countries[selected_country]
128
- if country_code == 'BE':
129
- data = Data_BE
130
- weather_columns = ['Temperature', 'Wind Speed Onshore', 'Wind Speed Offshore']
131
- data['Temperature'] = data['temperature_2m_8']
132
- data['Wind Speed Offshore'] = data['wind_speed_100m_4']
133
- data['Wind Speed Onshore'] = data['wind_speed_100m_8']
134
-
135
- elif country_code == 'DE':
136
- data = Data_DE
137
- weather_columns = ['Temperature', 'Wind Speed']
138
- data['Temperature'] = data['temperature_2m']
139
- data['Wind Speed'] = data['wind_speed_100m']
140
-
141
- elif country_code == 'NL':
142
- data = Data_NL
143
- weather_columns = ['Temperature', 'Wind Speed']
144
- data['Temperature'] = data['temperature_2m']
145
- data['Wind Speed'] = data['wind_speed_100m']
146
-
147
- elif country_code == 'FR':
148
- data = Data_FR
149
- weather_columns = ['Temperature', 'Wind Speed']
150
- data['Temperature'] = data['temperature_2m']
151
- data['Wind Speed'] = data['wind_speed_100m']
152
-
153
- def add_feature(df2, df_main):
154
- #df_main.index = pd.to_datetime(df_main.index)
155
- #df2.index = pd.to_datetime(df2.index)
156
- df_combined = df_main.combine_first(df2)
157
- last_date_df1 = df_main.index.max()
158
- first_date_df2 = df2.index.min()
159
- if first_date_df2 == last_date_df1 + pd.Timedelta(hours=1):
160
- df_combined = pd.concat([df_main, df2[df2.index > last_date_df1]], axis=0)
161
- #df_combined.reset_index(inplace=True)
162
- return df_combined
163
- #data.index = data.index.tz_localize('UTC')
164
- data = data.loc[start_date:end_date]
165
-
166
- forecast_columns = [
167
- 'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
168
-
169
- if section == 'Data':
170
- st.header("Data")
171
- st.write("""
172
- This section allows you to explore and upload your datasets.
173
- You can visualize raw data, clean it, and prepare it for analysis.
174
- """)
175
-
176
- st.header('Data Quality')
177
-
178
- output_text = f"The below percentages are calculated from the selected date range from {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}. This interval can be adjusted from the sidebar."
179
- st.write(output_text)
180
-
181
- # Report % of missing values
182
- missing_values = data[forecast_columns].isna().mean() * 100
183
- missing_values = missing_values.round(2)
184
-
185
- installed_capacities = {
186
- 'FR': { 'Solar': 17419, 'Wind Offshore': 1483, 'Wind Onshore': 22134},
187
- 'DE': { 'Solar': 73821, 'Wind Offshore': 8386, 'Wind Onshore': 59915},
188
- 'BE': { 'Solar': 8789, 'Wind Offshore': 2262, 'Wind Onshore': 3053},
189
- 'NL': { 'Solar': 22590, 'Wind Offshore': 3220, 'Wind Onshore': 6190},
190
- }
191
-
192
- if country_code not in installed_capacities:
193
- st.error(f"Installed capacities not defined for country code '{country_code}'.")
194
- st.stop()
195
-
196
-
197
- # Report % of extreme, impossible values for the selected country
198
- capacities = installed_capacities[country_code]
199
- extreme_values = {}
200
-
201
- for col in forecast_columns:
202
- if 'Solar_entsoe' in col:
203
- extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Solar'])).mean() * 100
204
- elif 'Solar_forecast_entsoe' in col:
205
- extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Solar'])).mean() * 100
206
- elif 'Wind_onshore_entsoe' in col:
207
- extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Onshore'])).mean() * 100
208
- elif 'Wind_onshore_forecast_entsoe' in col:
209
- extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Onshore'])).mean() * 100
210
- elif 'Wind_offshore_entsoe' in col:
211
- extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Offshore'])).mean() * 100
212
- elif 'Wind_offshore_forecast_entsoe' in col:
213
- extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Offshore'])).mean() * 100
214
- elif 'Load_entsoe' in col:
215
- extreme_values[col] = ((data[col] < 0)).mean() * 100
216
- elif 'Load_forecast_entsoe' in col:
217
- extreme_values[col] = ((data[col] < 0)).mean() * 100
218
-
219
-
220
- extreme_values = pd.Series(extreme_values).round(2)
221
-
222
- # Combine all metrics into one DataFrame
223
- metrics_df = pd.DataFrame({
224
- 'Missing Values (%)': missing_values,
225
- 'Extreme/Nonsensical Values (%)': extreme_values,
226
- })
227
-
228
- st.markdown(
229
- """
230
- <style>
231
- .dataframe {font-size: 45px !important;}
232
- </style>
233
- """,
234
- unsafe_allow_html=True
235
- )
236
-
237
- st.dataframe(metrics_df)
238
-
239
- st.write('<b><u>Missing values (%)</u></b>: Percentage of missing values in the dataset', unsafe_allow_html=True)
240
- st.write('<b><u>Extreme/Nonsensical values (%)</u></b>: Values that are considered implausible such as negative or out-of-bound values i.e., (generation<0) or (generation>capacity)', unsafe_allow_html=True)
241
-
242
- # Section 2: Forecasts
243
- elif section == 'Forecasts':
244
-
245
- st.header('Forecast Quality')
246
-
247
- # Time series for last 1 week
248
- st.subheader('Time Series: Last 1 Week')
249
- last_week = Data_BE.loc[Data_BE.index >= (data.index[-1] - pd.Timedelta(days=7))]
250
- st.write('The below plots show the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform between the selected data range.')
251
- forecast_columns_operational = [
252
- 'Load_entsoe','Load_forecast_entsoe', 'Load_LightGBMModel.7D.TimeCov.Temp.Forecast_elia', 'Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_onshore_LightGBMModel.1D.TimeCov.Temp.Forecast_elia','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Wind_offshore_LightGBMModel.1D.TimeCov.Temp.Forecast_elia','Solar_entsoe','Solar_forecast_entsoe', 'Solar_LightGBMModel.1D.TimeCov.Temp.Forecast_elia']
253
- forecast_columns = [
254
- 'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
255
-
256
- operation_forecast_load=forecast_dict['Predictions_10h.csv'].filter(like='Load_', axis=1)
257
- operation_forecast_res=forecast_dict['Predictions_17h.csv'].filter(regex='^(?!Load_)')
258
- operation_forecast_load.columns = [col.replace('_entsoe.', '_').replace('Naive.7D', 'WeeklyNaiveSeasonal') for col in operation_forecast_load.columns]
259
- operation_forecast_res.columns = [col.replace('_entsoe.', '_').replace('Naive.1D', 'DailyNaiveSeasonal') for col in operation_forecast_res.columns]
260
- Historical_and_Load=add_feature(operation_forecast_load, historical_forecast)
261
- Historical_and_operational=add_feature(operation_forecast_res, Historical_and_Load)
262
- #print(Historical_and_operational.filter(like='Forecast_elia', axis=1))
263
- best_forecast = Historical_and_operational.filter(like='Forecast_elia', axis=1)
264
- df_combined = Historical_and_operational.join(Data_BE, how='inner')
265
- last_week_best_forecast = best_forecast.loc[best_forecast.index >= (best_forecast.index[-24] - pd.Timedelta(days=8))]
266
-
267
-
268
- for i in range(0, len(forecast_columns_operational), 3):
269
- actual_col = forecast_columns_operational[i]
270
- forecast_col = forecast_columns_operational[i + 1]
271
- my_forecast = forecast_columns_operational[i + 2]
272
-
273
-
274
- if forecast_col in data.columns:
275
- fig = go.Figure()
276
- fig.add_trace(go.Scatter(x=last_week.index, y=last_week[actual_col], mode='lines', name='Actual'))
277
- fig.add_trace(go.Scatter(x=last_week.index, y=last_week[forecast_col], mode='lines', name='Forecast ENTSO-E'))
278
-
279
- if country_code=='BE':
280
- conformal=conformal_predictions(df_combined, actual_col, my_forecast)
281
- last_week_conformal = conformal.loc[conformal.index >= (conformal.index[-24] - pd.Timedelta(days=8))]
282
- if actual_col =='Load_entsoe':
283
- last_week_conformal = conformal.loc[conformal.index >= (conformal.index[-24] - pd.Timedelta(days=5))]
284
- fig.add_trace(go.Scatter(x=last_week_best_forecast.index, y=last_week_best_forecast[my_forecast], mode='lines', name='Forecast EDS'))
285
-
286
- fig.add_trace(go.Scatter(
287
- x=last_week_conformal.index,
288
- y=last_week_conformal['Lower_Interval'],
289
- mode='lines',
290
- line=dict(width=0),
291
- showlegend=False
292
- ))
293
-
294
- # Add the upper interval trace and fill to the lower interval
295
- fig.add_trace(go.Scatter(
296
- x=last_week_conformal.index,
297
- y=last_week_conformal['Upper_Interval'],
298
- mode='lines',
299
- line=dict(width=0),
300
- fill='tonexty', # Fill between this trace and the previous one
301
- fillcolor='rgba(68, 68, 68, 0.3)',
302
- name='P10/P90 prediction intervals'
303
- ))
304
-
305
-
306
- fig.update_layout(title=f'Forecasts vs Actual for {actual_col}', xaxis_title='Date', yaxis_title='Value [MW]')
307
-
308
- st.plotly_chart(fig)
309
-
310
-
311
- def plot_category(df_dict, category_prefix, title):
312
- fig = go.Figure()
313
-
314
- # Define base colors for each model
315
- model_colors = {
316
- 'LightGBMModel.TimeCov.Temp.Forecast_elia': '#1f77b4', # Blue
317
- 'LightGBMModel.TimeCov.Temp': '#2ca02c', # Green
318
- 'Naive': '#ff7f0e' # Orange
319
- }
320
-
321
- # To keep track of which model has been added to the legend
322
- legend_added = {'LightGBMModel.TimeCov.Temp.Forecast_elia': False, 'LightGBMModel.TimeCov.Temp': False, 'Naive': False}
323
-
324
- for file_name, df in df_dict.items():
325
- # Extract the hour from the filename, assuming the format is "Predictions_Xh.csv"
326
- hour = int(file_name.split('_')[1].replace('h.csv', ''))
327
-
328
- filtered_columns = [col for col in df.columns if col.startswith(category_prefix)]
329
- for column in filtered_columns:
330
- # Identify the model type with more precise logic
331
- if 'LightGBMModel' in column:
332
- if 'Forecast_elia' in column:
333
- model_key = 'LightGBMModel.TimeCov.Temp.Forecast_elia'
334
- elif 'TimeCov' in column:
335
- model_key = 'LightGBMModel.TimeCov.Temp'
336
- elif 'Naive' in column:
337
- model_key = 'Naive'
338
- else:
339
- continue # Skip if it doesn't match any model type
340
-
341
- # Extract the relevant part of the model name
342
- parts = column.split('.')
343
- model_name_parts = parts[1:] # Skip the variable prefix
344
- model_name = '.'.join(model_name_parts) # Rejoin the parts to form the model name
345
-
346
- # Get the base color for the model
347
- base_color = model_colors[model_key]
348
-
349
- # Calculate the color shade based on the hour
350
- color_scale = pc.hex_to_rgb(base_color)
351
- scale_factor = 0.3 + (hour / 40) # Adjust scale to ensure the gradient is visible
352
- adjusted_color = tuple(int(c * scale_factor) for c in color_scale)
353
- # Convert to RGBA with transparency for plot lines
354
- line_color = f'rgba({adjusted_color[0]}, {adjusted_color[1]}, {adjusted_color[2]}, 0.1)' # Transparent color for lines
355
-
356
- # Combine the hour and the model name for the legend, but only add the legend entry once
357
- show_legend = not legend_added[model_key]
358
-
359
- fig.add_trace(go.Scatter(
360
- x=df.index, # Assuming 'Date' is the index, use 'df.index' for x-axis
361
- y=df[column],
362
- mode='lines',
363
- name=model_name if show_legend else None, # Use the model name for the legend, but only once
364
- line=dict(color=base_color if show_legend else line_color), # Use opaque color for legend, transparent for lines
365
- showlegend=show_legend, # Show legend only once per model
366
- legendgroup=model_key # Grouping for consistent legend color
367
- ))
368
-
369
- # Mark that this model has been added to the legend
370
- if show_legend:
371
- legend_added[model_key] = True
372
-
373
- # Add real values as a separate trace, if provided
374
- filtered_Data_BE_df = Data_BE.loc[df.index]
375
-
376
- if filtered_Data_BE_df[f'{category_prefix}_entsoe'].notna().any():
377
- fig.add_trace(go.Scatter(
378
- x=filtered_Data_BE_df.index,
379
- y=filtered_Data_BE_df[f'{category_prefix}_entsoe'],
380
- mode='lines',
381
- name=f'Actual {category_prefix}',
382
- line=dict(color='black', width=2), # Black line for real values
383
- showlegend=True # Always show this in the legend
384
- ))
385
-
386
- # Update layout to position the legend at the top, side by side
387
- fig.update_layout(
388
- title=dict(
389
- text=title,
390
- x=0, # Center the title horizontally
391
- y=1.00, # Slightly lower the title to create more space
392
- xanchor='left',
393
- yanchor='top'
394
- ),
395
- xaxis_title='Date',
396
- yaxis_title='Value',
397
- legend=dict(
398
- orientation="h", # Horizontal legend
399
- yanchor="bottom", # Align to the bottom of the legend box
400
- y=1, # Increase y position to avoid overlap with the title
401
- xanchor="center", # Center the legend horizontally
402
- x=0.5 # Position at the center of the plot
403
- )
404
- )
405
- return fig
406
-
407
- if country_code == "BE":
408
- st.header('EDS Forecasts by Hour')
409
-
410
- solar_fig = plot_category(forecast_dict, 'Solar', 'Solar Predictions')
411
- st.plotly_chart(solar_fig)
412
-
413
- wind_offshore_fig = plot_category(forecast_dict, 'Wind_offshore', 'Wind Offshore Predictions')
414
- st.plotly_chart(wind_offshore_fig)
415
-
416
- wind_onshore_fig = plot_category(forecast_dict, 'Wind_onshore', 'Wind Onshore Predictions')
417
- st.plotly_chart(wind_onshore_fig)
418
-
419
- load_fig = plot_category(forecast_dict, 'Load', 'Load Predictions')
420
- st.plotly_chart(load_fig)
421
-
422
- # Scatter plots for error distribution
423
- st.subheader('Error Distribution')
424
- st.write('The below scatter plots show the error distribution of all three fields: Solar, Wind and Load between the selected date range')
425
- for i in range(0, len(forecast_columns), 2):
426
- actual_col = forecast_columns[i]
427
- forecast_col = forecast_columns[i + 1]
428
- if forecast_col in data.columns:
429
- obs = last_week[actual_col]
430
- pred = last_week[forecast_col]
431
- error = pred - obs
432
-
433
- fig = px.scatter(x=obs, y=pred, labels={'x': 'Observed [MW]', 'y': 'Predicted by ENTSO-E [MW]'})
434
- fig.update_layout(title=f'Error Distribution for {forecast_col}')
435
- st.plotly_chart(fig)
436
-
437
-
438
-
439
- st.subheader('Accuracy Metrics (Sorted by rMAE):')
440
-
441
- if country_code == "BE":
442
-
443
- # Combine the two DataFrames on their index
444
- df_combined = Historical_and_operational.join(Data_BE, how='inner')
445
- # List of model columns from historical_forecast
446
- model_columns = historical_forecast.columns
447
-
448
- # Initialize dictionaries to store MAE and RMSE results for each variable
449
- results_wind_onshore = {}
450
- results_wind_offshore = {}
451
- results_load = {}
452
- results_solar = {}
453
-
454
- # Mapping of variables to their corresponding naive models
455
- naive_models = {
456
- 'Wind_onshore': 'Wind_onshore_DailyNaiveSeasonal',
457
- 'Wind_offshore': 'Wind_offshore_DailyNaiveSeasonal',
458
- 'Load': 'Load_WeeklyNaiveSeasonal',
459
- 'Solar': 'Solar_DailyNaiveSeasonal'
460
- }
461
-
462
- # Step 1: Calculate MAE, RMSE, and rMAE for each model
463
- for col in model_columns:
464
- # Extract the variable name by taking everything before the first underscore
465
- base_variable = col.split('_')[0]
466
-
467
- # Handle cases where variable names might be combined with multiple parts (e.g., "Load_LightGBMModel...")
468
- if base_variable in ['Wind', 'Load', 'Solar']:
469
- if 'onshore' in col:
470
- variable_name = 'Wind_onshore'
471
- results_dict = results_wind_onshore
472
- elif 'offshore' in col:
473
- variable_name = 'Wind_offshore'
474
- results_dict = results_wind_offshore
475
- else:
476
- variable_name = base_variable
477
- results_dict = results_load if base_variable == 'Load' else results_solar
478
- else:
479
- variable_name = base_variable
480
-
481
- # Construct the corresponding `variable_entsoe` column name
482
- entsoe_column = f'{variable_name}_entsoe'
483
- naive_model_col = naive_models.get(variable_name, None)
484
-
485
- # Drop NaNs for the specific pair of columns before calculating MAE and RMSE
486
- if entsoe_column in df_combined.columns and naive_model_col in df_combined.columns:
487
- valid_data = df_combined[[col, entsoe_column]].dropna()
488
- valid_naive_data = df_combined[[entsoe_column, naive_model_col]].dropna()
489
-
490
- # Calculate MAE and RMSE for the model against the `variable_entsoe`
491
- mae = np.mean(abs(valid_data[col] - valid_data[entsoe_column]))
492
- rmse = np.sqrt(mean_squared_error(valid_data[col], valid_data[entsoe_column]))
493
-
494
- # Calculate MAE for the Naive model
495
- mae_naive = np.mean(abs(valid_naive_data[entsoe_column] - valid_naive_data[naive_model_col]))
496
-
497
- # Calculate rMAE for the model
498
- rMAE = mae / mae_naive if mae_naive != 0 else np.inf
499
-
500
- # Store the results in the corresponding dictionary
501
- results_dict[f'{col}'] = {'MAE': mae, 'RMSE': rmse, 'rMAE': rMAE}
502
-
503
- # Step 2: Calculate MAE, RMSE, and rMAE for ENTSO-E forecasts specifically
504
- for variable_name in naive_models.keys():
505
- entsoe_column = f'{variable_name}_entsoe'
506
- forecast_entsoe_column = f'{variable_name}_forecast_entsoe'
507
- naive_model_col = naive_models[variable_name]
508
-
509
- # Ensure that the ENTSO-E forecast is included in the results
510
- if forecast_entsoe_column in df_combined.columns:
511
- valid_data = df_combined[[forecast_entsoe_column, entsoe_column]].dropna()
512
- valid_naive_data = df_combined[[entsoe_column, naive_model_col]].dropna()
513
-
514
- # Calculate MAE and RMSE for the ENTSO-E forecast against the actuals
515
- mae_entsoe = np.mean(abs(valid_data[forecast_entsoe_column] - valid_data[entsoe_column]))
516
- rmse_entsoe = np.sqrt(mean_squared_error(valid_data[forecast_entsoe_column], valid_data[entsoe_column]))
517
-
518
- # Calculate rMAE for the ENTSO-E forecast
519
- mae_naive = np.mean(abs(valid_naive_data[entsoe_column] - valid_naive_data[naive_model_col]))
520
- rMAE_entsoe = mae_entsoe / mae_naive if mae_naive != 0 else np.inf
521
-
522
- # Add the ENTSO-E results to the corresponding dictionary
523
- if variable_name == 'Wind_onshore':
524
- results_wind_onshore[forecast_entsoe_column] = {'MAE': mae_entsoe, 'RMSE': rmse_entsoe, 'rMAE': rMAE_entsoe}
525
- elif variable_name == 'Wind_offshore':
526
- results_wind_offshore[forecast_entsoe_column] = {'MAE': mae_entsoe, 'RMSE': rmse_entsoe, 'rMAE': rMAE_entsoe}
527
- elif variable_name == 'Load':
528
- results_load[forecast_entsoe_column] = {'MAE': mae_entsoe, 'RMSE': rmse_entsoe, 'rMAE': rMAE_entsoe}
529
- elif variable_name == 'Solar':
530
- results_solar[forecast_entsoe_column] = {'MAE': mae_entsoe, 'RMSE': rmse_entsoe, 'rMAE': rMAE_entsoe}
531
-
532
- # Convert the dictionaries to DataFrames and sort by rMAE
533
- df_wind_onshore = pd.DataFrame.from_dict(results_wind_onshore, orient='index').sort_values(by='rMAE')
534
- df_wind_offshore = pd.DataFrame.from_dict(results_wind_offshore, orient='index').sort_values(by='rMAE')
535
- df_load = pd.DataFrame.from_dict(results_load, orient='index').sort_values(by='rMAE')
536
- df_solar = pd.DataFrame.from_dict(results_solar, orient='index').sort_values(by='rMAE')
537
-
538
-
539
- st.write("##### Wind Onshore:")
540
- st.dataframe(df_wind_onshore)
541
-
542
- st.write("##### Wind Offshore:")
543
- st.dataframe(df_wind_offshore)
544
-
545
- st.write("##### Load:")
546
- st.dataframe(df_load)
547
-
548
- st.write("##### Solar:")
549
- st.dataframe(df_solar)
550
-
551
-
552
-
553
- else:
554
- accuracy_metrics = pd.DataFrame(columns=['MAE', 'rMAE'], index=['Load', 'Solar', 'Wind Onshore', 'Wind Offshore'])
555
-
556
- for i in range(0, len(forecast_columns), 2):
557
- actual_col = forecast_columns[i]
558
- forecast_col = forecast_columns[i + 1]
559
- if forecast_col in data.columns:
560
- obs = data[actual_col]
561
- pred = data[forecast_col]
562
- error = pred - obs
563
-
564
- mae = round(np.mean(np.abs(error)),2)
565
- if 'Load' in actual_col:
566
- persistence = obs.shift(168) # Weekly persistence
567
- else:
568
- persistence = obs.shift(24) # Daily persistence
569
-
570
- # Using the whole year's data for rMAE calculations
571
- rmae = round(mae / np.mean(np.abs(obs - persistence)),2)
572
-
573
- row_label = 'Load' if 'Load' in actual_col else 'Solar' if 'Solar' in actual_col else 'Wind Offshore' if 'Wind_offshore' in actual_col else 'Wind Onshore'
574
- accuracy_metrics.loc[row_label] = [mae, rmae]
575
-
576
- accuracy_metrics.dropna(how='all', inplace=True)# Sort by rMAE (second column)
577
- accuracy_metrics.sort_values(by=accuracy_metrics.columns[1], ascending=True, inplace=True)
578
- accuracy_metrics = accuracy_metrics.round(4)
579
-
580
- col1, col2 = st.columns([3, 2])
581
-
582
- with col1:
583
- st.dataframe(accuracy_metrics)
584
-
585
- with col2:
586
- st.markdown("""
587
- <style>
588
- .big-font {
589
- font-size: 20px;
590
- font-weight: 500;
591
- }
592
- </style>
593
- <div class="big-font">
594
- Equations
595
- </div>
596
- """, unsafe_allow_html=True)
597
-
598
- st.markdown(r"""
599
- $\text{MAE} = \frac{1}{n}\sum_{i=1}^{n}|y_i - \hat{y}_i|$
600
-
601
-
602
- $\text{rMAE} = \frac{\text{MAE}}{MAE_{\text{Persistence Model}}}$
603
-
604
-
605
- """)
606
-
607
-
608
-
609
- st.subheader('ACF plots of Errors')
610
- st.write('The below plots show the ACF (Auto-Correlation Function) for the errors of all three fields: Solar, Wind and Load.')
611
-
612
- for i in range(0, len(forecast_columns), 2):
613
- actual_col = forecast_columns[i]
614
- forecast_col = forecast_columns[i + 1]
615
- if forecast_col in data.columns:
616
- obs = data[actual_col]
617
- pred = data[forecast_col]
618
- error = pred - obs
619
-
620
- st.write(f"**ACF of Errors for {actual_col}**")
621
- fig, ax = plt.subplots(figsize=(10, 5))
622
- plot_acf(error.dropna(), ax=ax)
623
- st.pyplot(fig)
624
-
625
- acf_values = acf(error.dropna(), nlags=240)
626
-
627
- # Section 3: Insights
628
- elif section == 'Insights':
629
- st.header("Insights")
630
- st.write("""
631
- This section provides insights derived from the data and forecasts.
632
- You can visualize trends, anomalies, and other important findings.
633
- """)
634
-
635
- # Scatter plots for correlation between wind, solar, and load
636
- st.subheader('Correlation between Wind, Solar, and Load')
637
- st.write('The below scatter plots for correlation between all three fields: Solar, Wind and Load.')
638
-
639
- combinations = [('Solar_entsoe', 'Load_entsoe'), ('Wind_onshore_entsoe', 'Load_entsoe'), ('Wind_offshore_entsoe', 'Load_entsoe'), ('Solar_entsoe', 'Wind_onshore_entsoe'), ('Solar_entsoe', 'Wind_offshore_entsoe')]
640
-
641
- for x_col, y_col in combinations:
642
- if x_col in data.columns and y_col in data.columns:
643
- # For solar combinations, filter out zero values
644
- if 'Solar_entsoe' in x_col:
645
- filtered_data = data[data['Solar_entsoe'] > 0]
646
- x_values = filtered_data[x_col]
647
- y_values = filtered_data[y_col]
648
- else:
649
- x_values = data[x_col]
650
- y_values = data[y_col]
651
-
652
- corr_coef = x_values.corr(y_values)
653
- fig = px.scatter(
654
- x=x_values,
655
- y=y_values,
656
- labels={'x': f'{x_col} [MW]', 'y': f'{y_col} [MW]'},
657
- title=f'{x_col} vs {y_col} (Correlation: {corr_coef:.2f})', color_discrete_sequence=['grey'])
658
- st.plotly_chart(fig)
659
-
660
-
661
- st.subheader('Weather vs. Generation/Demand')
662
- st.write('The below scatter plots show the relation between weather parameters (i.e., Temperature, Wind Speed) and generation/demand.')
663
-
664
- for weather_col in weather_columns:
665
- for actual_col in ['Load_entsoe', 'Solar_entsoe', 'Wind_onshore_entsoe', 'Wind_offshore_entsoe']:
666
- if weather_col in data.columns and actual_col in data.columns:
667
- clean_label = actual_col.replace('_entsoe', '')
668
- if weather_col == 'Temperature':
669
- fig = px.scatter(x=data[weather_col], y=data[actual_col], labels={'x': f'{weather_col} (°C)', 'y': f'{clean_label} Generation [MW]'}, color_discrete_sequence=['orange'])
670
- else:
671
- fig = px.scatter(x=data[weather_col], y=data[actual_col], labels={'x': f'{weather_col} (km/h)', 'y': clean_label})
672
- fig.update_layout(title=f'{weather_col} vs {actual_col}')
673
- st.plotly_chart(fig)
674
-
675
 
 
1
+ import requests
2
+ import pandas as pd
3
+ from io import StringIO
4
+ import streamlit as st
5
+ import os
6
+ import plotly.express as px
7
+ import plotly.graph_objects as go
8
+ import plotly.colors as pc
9
+ import numpy as np
10
+ from sklearn.metrics import mean_squared_error
11
+ from statsmodels.tsa.stattools import acf
12
+ from statsmodels.graphics.tsaplots import plot_acf
13
+ import matplotlib.pyplot as plt
14
+
15
+
16
+ ##GET ALL FILES FROM GITHUB
17
+ def load_GitHub(github_token, file_name):
18
+ url = f'https://raw.githubusercontent.com/margaridamascarenhas/Transparency_Data/main/{file_name}'
19
+ headers = {'Authorization': f'token {github_token}'}
20
+
21
+ response = requests.get(url, headers=headers)
22
+
23
+ if response.status_code == 200:
24
+ csv_content = StringIO(response.text)
25
+ df = pd.read_csv(csv_content)
26
+ if 'Date' in df.columns:
27
+ df['Date'] = pd.to_datetime(df['Date']) # Convert 'Date' column to datetime
28
+ df.set_index('Date', inplace=True) # Set 'Date' column as the index
29
+ #df.to_csv(file_name)
30
+ return df
31
+ else:
32
+ print(f"Failed to download {file_name}. Status code: {response.status_code}")
33
+ return None
34
+
35
+ def load_forecast(github_token):
36
+ predictions_dict = {}
37
+ for hour in range(24):
38
+ file_name = f'Predictions_{hour}h.csv'
39
+ df = load_GitHub(github_token, file_name)
40
+ if df is not None:
41
+ predictions_dict[file_name] = df
42
+ return predictions_dict
43
+
44
+ def convert_European_time(data, time_zone):
45
+ data.index = pd.to_datetime(data.index, utc=True)
46
+ data.index = data.index.tz_convert(time_zone)
47
+ data.index = data.index.tz_localize(None)
48
+ return data
49
+
50
+ github_token = st.secrets["GitHub_Token_KUL_Margarida"]
51
+
52
+ if github_token:
53
+ forecast_dict = load_forecast(github_token)
54
+
55
+ historical_forecast=load_GitHub(github_token, 'Historical_forecast.csv')
56
+
57
+ Data_BE=load_GitHub(github_token, 'BE_Elia_Entsoe_UTC.csv')
58
+ Data_FR=load_GitHub(github_token, 'FR_Entsoe_UTC.csv')
59
+ Data_NL=load_GitHub(github_token, 'NL_Entsoe_UTC.csv')
60
+ Data_DE=load_GitHub(github_token, 'DE_Entsoe_UTC.csv')
61
+
62
+ Data_BE=convert_European_time(Data_BE, 'Europe/Brussels')
63
+ Data_FR=convert_European_time(Data_FR, 'Europe/Paris')
64
+ Data_NL=convert_European_time(Data_NL, 'Europe/Amsterdam')
65
+ Data_DE=convert_European_time(Data_DE, 'Europe/Berlin')
66
+
67
+
68
+ else:
69
+ print("Please enter your GitHub Personal Access Token to proceed.")
70
+
71
+ def conformal_predictions(data, target, my_forecast):
72
+ data['Residuals'] = data[my_forecast] - data[actual_col]
73
+ data['Hour'] = data.index.hour
74
+
75
+ min_date = data.index.min()
76
+ for date in data.index.normalize().unique():
77
+ if date >= min_date + pd.DateOffset(days=30):
78
+ start_date = date - pd.DateOffset(days=30)
79
+ end_date = date
80
+ calculation_window = data[start_date:end_date-pd.DateOffset(hours=1)]
81
+ quantiles = calculation_window.groupby('Hour')['Residuals'].quantile(0.8)
82
+ # Use .loc to safely access and modify data
83
+ if date in data.index:
84
+ current_day_data = data.loc[date.strftime('%Y-%m-%d')]
85
+ for hour in current_day_data['Hour'].unique():
86
+ if hour in quantiles.index:
87
+ hour_quantile = quantiles[hour]
88
+ idx = (data.index.normalize() == date) & (data.Hour == hour)
89
+ data.loc[idx, 'Quantile_80'] = hour_quantile
90
+ data.loc[idx, 'Lower_Interval'] = data.loc[idx, my_forecast] - hour_quantile
91
+ data.loc[idx, 'Upper_Interval'] = data.loc[idx, my_forecast] + hour_quantile
92
+ #data.reset_index(inplace=True)
93
+ return data
94
+
95
+
96
+ st.title("Transparency++")
97
+
98
+ countries = {
99
+ 'Belgium': 'BE',
100
+ 'Netherlands': 'NL',
101
+ 'Germany': 'DE',
102
+ 'France': 'FR',
103
+ }
104
+
105
+
106
+ st.sidebar.header('Filters')
107
+
108
+ selected_country = st.sidebar.selectbox('Select Country', list(countries.keys()))
109
+
110
+
111
+ st.write()
112
+ date_range = st.sidebar.date_input("Select Date Range for Metrics Calculation:",
113
+ value=(pd.to_datetime("2024-01-01"), pd.to_datetime(pd.Timestamp('today'))))
114
+
115
+ # Ensure the date range provides two dates
116
+ if len(date_range) == 2:
117
+ start_date = pd.Timestamp(date_range[0])
118
+ end_date = pd.Timestamp(date_range[1])
119
+ else:
120
+ st.error("Please select a valid date range.")
121
+ st.stop()
122
+
123
+ # Sidebar with radio buttons for different sections
124
+ section = st.sidebar.radio('Section', ['Data', 'Forecasts', 'Insights'])
125
+
126
+
127
+ country_code = countries[selected_country]
128
+ if country_code == 'BE':
129
+ data = Data_BE
130
+ weather_columns = ['Temperature', 'Wind Speed Onshore', 'Wind Speed Offshore']
131
+ data['Temperature'] = data['temperature_2m_8']
132
+ data['Wind Speed Offshore'] = data['wind_speed_100m_4']
133
+ data['Wind Speed Onshore'] = data['wind_speed_100m_8']
134
+
135
+ elif country_code == 'DE':
136
+ data = Data_DE
137
+ weather_columns = ['Temperature', 'Wind Speed']
138
+ data['Temperature'] = data['temperature_2m']
139
+ data['Wind Speed'] = data['wind_speed_100m']
140
+
141
+ elif country_code == 'NL':
142
+ data = Data_NL
143
+ weather_columns = ['Temperature', 'Wind Speed']
144
+ data['Temperature'] = data['temperature_2m']
145
+ data['Wind Speed'] = data['wind_speed_100m']
146
+
147
+ elif country_code == 'FR':
148
+ data = Data_FR
149
+ weather_columns = ['Temperature', 'Wind Speed']
150
+ data['Temperature'] = data['temperature_2m']
151
+ data['Wind Speed'] = data['wind_speed_100m']
152
+
153
+ def add_feature(df2, df_main):
154
+ #df_main.index = pd.to_datetime(df_main.index)
155
+ #df2.index = pd.to_datetime(df2.index)
156
+ df_combined = df_main.combine_first(df2)
157
+ last_date_df1 = df_main.index.max()
158
+ first_date_df2 = df2.index.min()
159
+ if first_date_df2 == last_date_df1 + pd.Timedelta(hours=1):
160
+ df_combined = pd.concat([df_main, df2[df2.index > last_date_df1]], axis=0)
161
+ #df_combined.reset_index(inplace=True)
162
+ return df_combined
163
+ #data.index = data.index.tz_localize('UTC')
164
+ data = data.loc[start_date:end_date]
165
+
166
+ forecast_columns = [
167
+ 'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
168
+
169
+ if section == 'Data':
170
+ st.header("Data")
171
+ st.write("""
172
+ This section allows you to explore and upload your datasets.
173
+ You can visualize raw data, clean it, and prepare it for analysis.
174
+ """)
175
+
176
+ st.header('Data Quality')
177
+
178
+ output_text = f"The below percentages are calculated from the selected date range from {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}. This interval can be adjusted from the sidebar."
179
+ st.write(output_text)
180
+
181
+ # Report % of missing values
182
+ missing_values = data[forecast_columns].isna().mean() * 100
183
+ missing_values = missing_values.round(2)
184
+
185
+ installed_capacities = {
186
+ 'FR': { 'Solar': 17419, 'Wind Offshore': 1483, 'Wind Onshore': 22134},
187
+ 'DE': { 'Solar': 73821, 'Wind Offshore': 8386, 'Wind Onshore': 59915},
188
+ 'BE': { 'Solar': 8789, 'Wind Offshore': 2262, 'Wind Onshore': 3053},
189
+ 'NL': { 'Solar': 22590, 'Wind Offshore': 3220, 'Wind Onshore': 6190},
190
+ }
191
+
192
+ if country_code not in installed_capacities:
193
+ st.error(f"Installed capacities not defined for country code '{country_code}'.")
194
+ st.stop()
195
+
196
+
197
+ # Report % of extreme, impossible values for the selected country
198
+ capacities = installed_capacities[country_code]
199
+ extreme_values = {}
200
+
201
+ for col in forecast_columns:
202
+ if 'Solar_entsoe' in col:
203
+ extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Solar'])).mean() * 100
204
+ elif 'Solar_forecast_entsoe' in col:
205
+ extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Solar'])).mean() * 100
206
+ elif 'Wind_onshore_entsoe' in col:
207
+ extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Onshore'])).mean() * 100
208
+ elif 'Wind_onshore_forecast_entsoe' in col:
209
+ extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Onshore'])).mean() * 100
210
+ elif 'Wind_offshore_entsoe' in col:
211
+ extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Offshore'])).mean() * 100
212
+ elif 'Wind_offshore_forecast_entsoe' in col:
213
+ extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Offshore'])).mean() * 100
214
+ elif 'Load_entsoe' in col:
215
+ extreme_values[col] = ((data[col] < 0)).mean() * 100
216
+ elif 'Load_forecast_entsoe' in col:
217
+ extreme_values[col] = ((data[col] < 0)).mean() * 100
218
+
219
+
220
+ extreme_values = pd.Series(extreme_values).round(2)
221
+
222
+ # Combine all metrics into one DataFrame
223
+ metrics_df = pd.DataFrame({
224
+ 'Missing Values (%)': missing_values,
225
+ 'Extreme/Nonsensical Values (%)': extreme_values,
226
+ })
227
+
228
+ st.markdown(
229
+ """
230
+ <style>
231
+ .dataframe {font-size: 45px !important;}
232
+ </style>
233
+ """,
234
+ unsafe_allow_html=True
235
+ )
236
+
237
+ st.dataframe(metrics_df)
238
+
239
+ st.write('<b><u>Missing values (%)</u></b>: Percentage of missing values in the dataset', unsafe_allow_html=True)
240
+ st.write('<b><u>Extreme/Nonsensical values (%)</u></b>: Values that are considered implausible such as negative or out-of-bound values i.e., (generation<0) or (generation>capacity)', unsafe_allow_html=True)
241
+
242
+ # Section 2: Forecasts
243
+ elif section == 'Forecasts':
244
+
245
+ st.header('Forecast Quality')
246
+
247
+ # Time series for last 1 week
248
+ st.subheader('Time Series: Last 1 Week')
249
+ last_week = Data_BE.loc[Data_BE.index >= (data.index[-1] - pd.Timedelta(days=7))]
250
+ st.write('The below plots show the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform between the selected data range.')
251
+ forecast_columns_operational = [
252
+ 'Load_entsoe','Load_forecast_entsoe', 'Load_LightGBMModel.7D.TimeCov.Temp.Forecast_elia', 'Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_onshore_LightGBMModel.1D.TimeCov.Temp.Forecast_elia','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Wind_offshore_LightGBMModel.1D.TimeCov.Temp.Forecast_elia','Solar_entsoe','Solar_forecast_entsoe', 'Solar_LightGBMModel.1D.TimeCov.Temp.Forecast_elia']
253
+ forecast_columns = [
254
+ 'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
255
+
256
+ operation_forecast_load=forecast_dict['Predictions_10h.csv'].filter(like='Load_', axis=1)
257
+ operation_forecast_res=forecast_dict['Predictions_17h.csv'].filter(regex='^(?!Load_)')
258
+ operation_forecast_load.columns = [col.replace('_entsoe.', '_').replace('Naive.7D', 'WeeklyNaiveSeasonal') for col in operation_forecast_load.columns]
259
+ operation_forecast_res.columns = [col.replace('_entsoe.', '_').replace('Naive.1D', 'DailyNaiveSeasonal') for col in operation_forecast_res.columns]
260
+ Historical_and_Load=add_feature(operation_forecast_load, historical_forecast)
261
+ Historical_and_operational=add_feature(operation_forecast_res, Historical_and_Load)
262
+ #print(Historical_and_operational.filter(like='Forecast_elia', axis=1))
263
+ best_forecast = Historical_and_operational.filter(like='Forecast_elia', axis=1)
264
+ df_combined = Historical_and_operational.join(Data_BE, how='inner')
265
+ last_week_best_forecast = best_forecast.loc[best_forecast.index >= (best_forecast.index[-24] - pd.Timedelta(days=8))]
266
+
267
+
268
+ for i in range(0, len(forecast_columns_operational), 3):
269
+ actual_col = forecast_columns_operational[i]
270
+ forecast_col = forecast_columns_operational[i + 1]
271
+ my_forecast = forecast_columns_operational[i + 2]
272
+
273
+
274
+ if forecast_col in data.columns:
275
+ fig = go.Figure()
276
+ fig.add_trace(go.Scatter(x=last_week.index, y=last_week[actual_col], mode='lines', name='Actual'))
277
+ fig.add_trace(go.Scatter(x=last_week.index, y=last_week[forecast_col], mode='lines', name='Forecast ENTSO-E'))
278
+
279
+ if country_code=='BE':
280
+ conformal=conformal_predictions(df_combined, actual_col, my_forecast)
281
+ last_week_conformal = conformal.loc[conformal.index >= (conformal.index[-24] - pd.Timedelta(days=8))]
282
+ if actual_col =='Load_entsoe':
283
+ last_week_conformal = conformal.loc[conformal.index >= (conformal.index[-24] - pd.Timedelta(days=5))]
284
+ fig.add_trace(go.Scatter(x=last_week_best_forecast.index, y=last_week_best_forecast[my_forecast], mode='lines', name='Forecast EDS'))
285
+
286
+ fig.add_trace(go.Scatter(
287
+ x=last_week_conformal.index,
288
+ y=last_week_conformal['Lower_Interval'],
289
+ mode='lines',
290
+ line=dict(width=0),
291
+ showlegend=False
292
+ ))
293
+
294
+ # Add the upper interval trace and fill to the lower interval
295
+ fig.add_trace(go.Scatter(
296
+ x=last_week_conformal.index,
297
+ y=last_week_conformal['Upper_Interval'],
298
+ mode='lines',
299
+ line=dict(width=0),
300
+ fill='tonexty', # Fill between this trace and the previous one
301
+ fillcolor='rgba(68, 68, 68, 0.3)',
302
+ name='P10/P90 prediction intervals'
303
+ ))
304
+
305
+
306
+ fig.update_layout(title=f'Forecasts vs Actual for {actual_col}', xaxis_title='Date', yaxis_title='Value [MW]')
307
+
308
+ st.plotly_chart(fig)
309
+
310
+
311
+ def plot_category(df_dict, category_prefix, title):
312
+ fig = go.Figure()
313
+
314
+ # Define base colors for each model
315
+ model_colors = {
316
+ 'LightGBMModel.TimeCov.Temp.Forecast_elia': '#1f77b4', # Blue
317
+ 'LightGBMModel.TimeCov.Temp': '#2ca02c', # Green
318
+ 'Naive': '#ff7f0e' # Orange
319
+ }
320
+
321
+ # To keep track of which model has been added to the legend
322
+ legend_added = {'LightGBMModel.TimeCov.Temp.Forecast_elia': False, 'LightGBMModel.TimeCov.Temp': False, 'Naive': False}
323
+
324
+ for file_name, df in df_dict.items():
325
+ # Extract the hour from the filename, assuming the format is "Predictions_Xh.csv"
326
+ hour = int(file_name.split('_')[1].replace('h.csv', ''))
327
+
328
+ filtered_columns = [col for col in df.columns if col.startswith(category_prefix)]
329
+ for column in filtered_columns:
330
+ # Identify the model type with more precise logic
331
+ if 'LightGBMModel' in column:
332
+ if 'Forecast_elia' in column:
333
+ model_key = 'LightGBMModel.TimeCov.Temp.Forecast_elia'
334
+ elif 'TimeCov' in column:
335
+ model_key = 'LightGBMModel.TimeCov.Temp'
336
+ elif 'Naive' in column:
337
+ model_key = 'Naive'
338
+ else:
339
+ continue # Skip if it doesn't match any model type
340
+
341
+ # Extract the relevant part of the model name
342
+ parts = column.split('.')
343
+ model_name_parts = parts[1:] # Skip the variable prefix
344
+ model_name = '.'.join(model_name_parts) # Rejoin the parts to form the model name
345
+
346
+ # Get the base color for the model
347
+ base_color = model_colors[model_key]
348
+
349
+ # Calculate the color shade based on the hour
350
+ color_scale = pc.hex_to_rgb(base_color)
351
+ scale_factor = 0.3 + (hour / 40) # Adjust scale to ensure the gradient is visible
352
+ adjusted_color = tuple(int(c * scale_factor) for c in color_scale)
353
+ # Convert to RGBA with transparency for plot lines
354
+ line_color = f'rgba({adjusted_color[0]}, {adjusted_color[1]}, {adjusted_color[2]}, 0.1)' # Transparent color for lines
355
+
356
+ # Combine the hour and the model name for the legend, but only add the legend entry once
357
+ show_legend = not legend_added[model_key]
358
+
359
+ fig.add_trace(go.Scatter(
360
+ x=df.index, # Assuming 'Date' is the index, use 'df.index' for x-axis
361
+ y=df[column],
362
+ mode='lines',
363
+ name=model_name if show_legend else None, # Use the model name for the legend, but only once
364
+ line=dict(color=base_color if show_legend else line_color), # Use opaque color for legend, transparent for lines
365
+ showlegend=show_legend, # Show legend only once per model
366
+ legendgroup=model_key # Grouping for consistent legend color
367
+ ))
368
+
369
+ # Mark that this model has been added to the legend
370
+ if show_legend:
371
+ legend_added[model_key] = True
372
+
373
+ # Add real values as a separate trace, if provided
374
+ filtered_Data_BE_df = Data_BE.loc[df.index]
375
+
376
+ if filtered_Data_BE_df[f'{category_prefix}_entsoe'].notna().any():
377
+ fig.add_trace(go.Scatter(
378
+ x=filtered_Data_BE_df.index,
379
+ y=filtered_Data_BE_df[f'{category_prefix}_entsoe'],
380
+ mode='lines',
381
+ name=f'Actual {category_prefix}',
382
+ line=dict(color='black', width=2), # Black line for real values
383
+ showlegend=True # Always show this in the legend
384
+ ))
385
+
386
+ # Update layout to position the legend at the top, side by side
387
+ fig.update_layout(
388
+ title=dict(
389
+ text=title,
390
+ x=0, # Center the title horizontally
391
+ y=1.00, # Slightly lower the title to create more space
392
+ xanchor='left',
393
+ yanchor='top'
394
+ ),
395
+ xaxis_title='Date',
396
+ yaxis_title='Value',
397
+ legend=dict(
398
+ orientation="h", # Horizontal legend
399
+ yanchor="bottom", # Align to the bottom of the legend box
400
+ y=1, # Increase y position to avoid overlap with the title
401
+ xanchor="center", # Center the legend horizontally
402
+ x=0.5 # Position at the center of the plot
403
+ )
404
+ )
405
+ return fig
406
+
407
+ if country_code == "BE":
408
+ st.header('EDS Forecasts by Hour')
409
+
410
+ solar_fig = plot_category(forecast_dict, 'Solar', 'Solar Predictions')
411
+ st.plotly_chart(solar_fig)
412
+
413
+ wind_offshore_fig = plot_category(forecast_dict, 'Wind_offshore', 'Wind Offshore Predictions')
414
+ st.plotly_chart(wind_offshore_fig)
415
+
416
+ wind_onshore_fig = plot_category(forecast_dict, 'Wind_onshore', 'Wind Onshore Predictions')
417
+ st.plotly_chart(wind_onshore_fig)
418
+
419
+ load_fig = plot_category(forecast_dict, 'Load', 'Load Predictions')
420
+ st.plotly_chart(load_fig)
421
+
422
+ # Scatter plots for error distribution
423
+ st.subheader('Error Distribution')
424
+ st.write('The below scatter plots show the error distribution of all three fields: Solar, Wind and Load between the selected date range')
425
+ for i in range(0, len(forecast_columns), 2):
426
+ actual_col = forecast_columns[i]
427
+ forecast_col = forecast_columns[i + 1]
428
+ if forecast_col in data.columns:
429
+ obs = last_week[actual_col]
430
+ pred = last_week[forecast_col]
431
+ error = pred - obs
432
+
433
+ fig = px.scatter(x=obs, y=pred, labels={'x': 'Observed [MW]', 'y': 'Predicted by ENTSO-E [MW]'})
434
+ fig.update_layout(title=f'Error Distribution for {forecast_col}')
435
+ st.plotly_chart(fig)
436
+
437
+
438
+
439
+ st.subheader('Accuracy Metrics (Sorted by rMAE):')
440
+
441
+ if country_code == "BE":
442
+
443
+ # Combine the two DataFrames on their index
444
+ df_combined = Historical_and_operational.join(Data_BE, how='inner')
445
+ # List of model columns from historical_forecast
446
+ model_columns = historical_forecast.columns
447
+
448
+ # Initialize dictionaries to store MAE and RMSE results for each variable
449
+ results_wind_onshore = {}
450
+ results_wind_offshore = {}
451
+ results_load = {}
452
+ results_solar = {}
453
+
454
+ # Mapping of variables to their corresponding naive models
455
+ naive_models = {
456
+ 'Wind_onshore': 'Wind_onshore_DailyNaiveSeasonal',
457
+ 'Wind_offshore': 'Wind_offshore_DailyNaiveSeasonal',
458
+ 'Load': 'Load_WeeklyNaiveSeasonal',
459
+ 'Solar': 'Solar_DailyNaiveSeasonal'
460
+ }
461
+
462
+ # Step 1: Calculate MAE, RMSE, and rMAE for each model
463
+ for col in model_columns:
464
+ # Extract the variable name by taking everything before the first underscore
465
+ base_variable = col.split('_')[0]
466
+
467
+ # Handle cases where variable names might be combined with multiple parts (e.g., "Load_LightGBMModel...")
468
+ if base_variable in ['Wind', 'Load', 'Solar']:
469
+ if 'onshore' in col:
470
+ variable_name = 'Wind_onshore'
471
+ results_dict = results_wind_onshore
472
+ elif 'offshore' in col:
473
+ variable_name = 'Wind_offshore'
474
+ results_dict = results_wind_offshore
475
+ else:
476
+ variable_name = base_variable
477
+ results_dict = results_load if base_variable == 'Load' else results_solar
478
+ else:
479
+ variable_name = base_variable
480
+
481
+ # Construct the corresponding `variable_entsoe` column name
482
+ entsoe_column = f'{variable_name}_entsoe'
483
+ naive_model_col = naive_models.get(variable_name, None)
484
+
485
+ # Drop NaNs for the specific pair of columns before calculating MAE and RMSE
486
+ if entsoe_column in df_combined.columns and naive_model_col in df_combined.columns:
487
+ valid_data = df_combined[[col, entsoe_column]].dropna()
488
+ valid_naive_data = df_combined[[entsoe_column, naive_model_col]].dropna()
489
+
490
+ # Calculate MAE and RMSE for the model against the `variable_entsoe`
491
+ mae = np.mean(abs(valid_data[col] - valid_data[entsoe_column]))
492
+ rmse = np.sqrt(mean_squared_error(valid_data[col], valid_data[entsoe_column]))
493
+
494
+ # Calculate MAE for the Naive model
495
+ mae_naive = np.mean(abs(valid_naive_data[entsoe_column] - valid_naive_data[naive_model_col]))
496
+
497
+ # Calculate rMAE for the model
498
+ rMAE = mae / mae_naive if mae_naive != 0 else np.inf
499
+
500
+ # Store the results in the corresponding dictionary
501
+ results_dict[f'{col}'] = {'MAE': mae, 'RMSE': rmse, 'rMAE': rMAE}
502
+
503
+ # Step 2: Calculate MAE, RMSE, and rMAE for ENTSO-E forecasts specifically
504
+ for variable_name in naive_models.keys():
505
+ entsoe_column = f'{variable_name}_entsoe'
506
+ forecast_entsoe_column = f'{variable_name}_forecast_entsoe'
507
+ naive_model_col = naive_models[variable_name]
508
+
509
+ # Ensure that the ENTSO-E forecast is included in the results
510
+ if forecast_entsoe_column in df_combined.columns:
511
+ valid_data = df_combined[[forecast_entsoe_column, entsoe_column]].dropna()
512
+ valid_naive_data = df_combined[[entsoe_column, naive_model_col]].dropna()
513
+
514
+ # Calculate MAE and RMSE for the ENTSO-E forecast against the actuals
515
+ mae_entsoe = np.mean(abs(valid_data[forecast_entsoe_column] - valid_data[entsoe_column]))
516
+ rmse_entsoe = np.sqrt(mean_squared_error(valid_data[forecast_entsoe_column], valid_data[entsoe_column]))
517
+
518
+ # Calculate rMAE for the ENTSO-E forecast
519
+ mae_naive = np.mean(abs(valid_naive_data[entsoe_column] - valid_naive_data[naive_model_col]))
520
+ rMAE_entsoe = mae_entsoe / mae_naive if mae_naive != 0 else np.inf
521
+
522
+ # Add the ENTSO-E results to the corresponding dictionary
523
+ if variable_name == 'Wind_onshore':
524
+ results_wind_onshore[forecast_entsoe_column] = {'MAE': mae_entsoe, 'RMSE': rmse_entsoe, 'rMAE': rMAE_entsoe}
525
+ elif variable_name == 'Wind_offshore':
526
+ results_wind_offshore[forecast_entsoe_column] = {'MAE': mae_entsoe, 'RMSE': rmse_entsoe, 'rMAE': rMAE_entsoe}
527
+ elif variable_name == 'Load':
528
+ results_load[forecast_entsoe_column] = {'MAE': mae_entsoe, 'RMSE': rmse_entsoe, 'rMAE': rMAE_entsoe}
529
+ elif variable_name == 'Solar':
530
+ results_solar[forecast_entsoe_column] = {'MAE': mae_entsoe, 'RMSE': rmse_entsoe, 'rMAE': rMAE_entsoe}
531
+
532
+ # Convert the dictionaries to DataFrames and sort by rMAE
533
+ df_wind_onshore = pd.DataFrame.from_dict(results_wind_onshore, orient='index').sort_values(by='rMAE')
534
+ df_wind_offshore = pd.DataFrame.from_dict(results_wind_offshore, orient='index').sort_values(by='rMAE')
535
+ df_load = pd.DataFrame.from_dict(results_load, orient='index').sort_values(by='rMAE')
536
+ df_solar = pd.DataFrame.from_dict(results_solar, orient='index').sort_values(by='rMAE')
537
+
538
+
539
+ st.write("##### Wind Onshore:")
540
+ st.dataframe(df_wind_onshore)
541
+
542
+ st.write("##### Wind Offshore:")
543
+ st.dataframe(df_wind_offshore)
544
+
545
+ st.write("##### Load:")
546
+ st.dataframe(df_load)
547
+
548
+ st.write("##### Solar:")
549
+ st.dataframe(df_solar)
550
+
551
+
552
+
553
+ else:
554
+ accuracy_metrics = pd.DataFrame(columns=['MAE', 'rMAE'], index=['Load', 'Solar', 'Wind Onshore', 'Wind Offshore'])
555
+
556
+ for i in range(0, len(forecast_columns), 2):
557
+ actual_col = forecast_columns[i]
558
+ forecast_col = forecast_columns[i + 1]
559
+ if forecast_col in data.columns:
560
+ obs = data[actual_col]
561
+ pred = data[forecast_col]
562
+ error = pred - obs
563
+
564
+ mae = round(np.mean(np.abs(error)),2)
565
+ if 'Load' in actual_col:
566
+ persistence = obs.shift(168) # Weekly persistence
567
+ else:
568
+ persistence = obs.shift(24) # Daily persistence
569
+
570
+ # Using the whole year's data for rMAE calculations
571
+ rmae = round(mae / np.mean(np.abs(obs - persistence)),2)
572
+
573
+ row_label = 'Load' if 'Load' in actual_col else 'Solar' if 'Solar' in actual_col else 'Wind Offshore' if 'Wind_offshore' in actual_col else 'Wind Onshore'
574
+ accuracy_metrics.loc[row_label] = [mae, rmae]
575
+
576
+ accuracy_metrics.dropna(how='all', inplace=True)# Sort by rMAE (second column)
577
+ accuracy_metrics.sort_values(by=accuracy_metrics.columns[1], ascending=True, inplace=True)
578
+ accuracy_metrics = accuracy_metrics.round(4)
579
+
580
+ col1, col2 = st.columns([3, 2])
581
+
582
+ with col1:
583
+ st.dataframe(accuracy_metrics)
584
+
585
+ with col2:
586
+ st.markdown("""
587
+ <style>
588
+ .big-font {
589
+ font-size: 20px;
590
+ font-weight: 500;
591
+ }
592
+ </style>
593
+ <div class="big-font">
594
+ Equations
595
+ </div>
596
+ """, unsafe_allow_html=True)
597
+
598
+ st.markdown(r"""
599
+ $\text{MAE} = \frac{1}{n}\sum_{i=1}^{n}|y_i - \hat{y}_i|$
600
+
601
+
602
+ $\text{rMAE} = \frac{\text{MAE}}{MAE_{\text{Persistence Model}}}$
603
+
604
+
605
+ """)
606
+
607
+
608
+
609
+ st.subheader('ACF plots of Errors')
610
+ st.write('The below plots show the ACF (Auto-Correlation Function) for the errors of all three fields: Solar, Wind and Load.')
611
+
612
+ for i in range(0, len(forecast_columns), 2):
613
+ actual_col = forecast_columns[i]
614
+ forecast_col = forecast_columns[i + 1]
615
+ if forecast_col in data.columns:
616
+ obs = data[actual_col]
617
+ pred = data[forecast_col]
618
+ error = pred - obs
619
+
620
+ st.write(f"**ACF of Errors for {actual_col}**")
621
+ fig, ax = plt.subplots(figsize=(10, 5))
622
+ plot_acf(error.dropna(), ax=ax)
623
+ st.pyplot(fig)
624
+
625
+ acf_values = acf(error.dropna(), nlags=240)
626
+
627
+ # Section 3: Insights
628
+ elif section == 'Insights':
629
+ st.header("Insights")
630
+ st.write("""
631
+ This section provides insights derived from the data and forecasts.
632
+ You can visualize trends, anomalies, and other important findings.
633
+ """)
634
+
635
+ # Scatter plots for correlation between wind, solar, and load
636
+ st.subheader('Correlation between Wind, Solar, and Load')
637
+ st.write('The below scatter plots for correlation between all three fields: Solar, Wind and Load.')
638
+
639
+ combinations = [('Solar_entsoe', 'Load_entsoe'), ('Wind_onshore_entsoe', 'Load_entsoe'), ('Wind_offshore_entsoe', 'Load_entsoe'), ('Solar_entsoe', 'Wind_onshore_entsoe'), ('Solar_entsoe', 'Wind_offshore_entsoe')]
640
+
641
+ for x_col, y_col in combinations:
642
+ if x_col in data.columns and y_col in data.columns:
643
+ # For solar combinations, filter out zero values
644
+ if 'Solar_entsoe' in x_col:
645
+ filtered_data = data[data['Solar_entsoe'] > 0]
646
+ x_values = filtered_data[x_col]
647
+ y_values = filtered_data[y_col]
648
+ else:
649
+ x_values = data[x_col]
650
+ y_values = data[y_col]
651
+
652
+ corr_coef = x_values.corr(y_values)
653
+ fig = px.scatter(
654
+ x=x_values,
655
+ y=y_values,
656
+ labels={'x': f'{x_col} [MW]', 'y': f'{y_col} [MW]'},
657
+ title=f'{x_col} vs {y_col} (Correlation: {corr_coef:.2f})', color_discrete_sequence=['grey'])
658
+ st.plotly_chart(fig)
659
+
660
+
661
+ st.subheader('Weather vs. Generation/Demand')
662
+ st.write('The below scatter plots show the relation between weather parameters (i.e., Temperature, Wind Speed) and generation/demand.')
663
+
664
+ for weather_col in weather_columns:
665
+ for actual_col in ['Load_entsoe', 'Solar_entsoe', 'Wind_onshore_entsoe', 'Wind_offshore_entsoe']:
666
+ if weather_col in data.columns and actual_col in data.columns:
667
+ clean_label = actual_col.replace('_entsoe', '')
668
+ if weather_col == 'Temperature':
669
+ fig = px.scatter(x=data[weather_col], y=data[actual_col], labels={'x': f'{weather_col} (°C)', 'y': f'{clean_label} Generation [MW]'}, color_discrete_sequence=['orange'])
670
+ else:
671
+ fig = px.scatter(x=data[weather_col], y=data[actual_col], labels={'x': f'{weather_col} (km/h)', 'y': clean_label})
672
+ fig.update_layout(title=f'{weather_col} vs {actual_col}')
673
+ st.plotly_chart(fig)
674
+
675