mmmapms commited on
Commit
99fb501
·
verified ·
1 Parent(s): 17064fb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +189 -343
app.py CHANGED
@@ -129,6 +129,7 @@ upper_space.markdown("""
129
 
130
 
131
  countries = {
 
132
  'Netherlands': 'NL',
133
  'Germany': 'DE',
134
  'France': 'FR',
@@ -143,54 +144,55 @@ st.sidebar.caption("Choose the country for which you want to display data or for
143
 
144
  selected_country = st.sidebar.selectbox('Select Country', list(countries.keys()))
145
 
 
146
 
147
- st.sidebar.subheader("Select Date Range ")
148
- st.sidebar.caption("Define the time period over which the accuracy metrics will be calculated.")
149
 
150
- st.write()
151
- date_range = st.sidebar.date_input("Select Date Range for Metrics Calculation:",
 
 
 
 
152
  value=(pd.to_datetime("2024-01-01"), pd.to_datetime(pd.Timestamp('today'))))
 
 
 
 
 
 
153
 
154
- # Ensure the date range provides two dates
155
- if len(date_range) == 2:
156
- start_date = pd.Timestamp(date_range[0])
157
- end_date = pd.Timestamp(date_range[1])
158
  else:
159
- st.error("Please select a valid date range.")
160
- st.stop()
161
-
162
- st.sidebar.subheader("Section")
163
- st.sidebar.caption("Select the type of information you want to explore.")
164
 
165
-
166
- # Sidebar with radio buttons for different sections
167
- section = st.sidebar.radio('', ['Data', 'Forecasts', 'Insights'],index=1)
168
-
169
- country_code = countries[selected_country]
170
- if country_code == 'BE':
171
- data = Data_BE
172
- weather_columns = ['Temperature', 'Wind Speed Onshore', 'Wind Speed Offshore']
173
- data['Temperature'] = data['temperature_2m_8']
174
- data['Wind Speed Offshore'] = data['wind_speed_100m_4']
175
- data['Wind Speed Onshore'] = data['wind_speed_100m_8']
176
-
177
- elif country_code == 'DE':
178
- data = Data_DE
179
- weather_columns = ['Temperature', 'Wind Speed']
180
- data['Temperature'] = data['temperature_2m']
181
- data['Wind Speed'] = data['wind_speed_100m']
182
-
183
- elif country_code == 'NL':
184
- data = Data_NL
185
- weather_columns = ['Temperature', 'Wind Speed']
186
- data['Temperature'] = data['temperature_2m']
187
- data['Wind Speed'] = data['wind_speed_100m']
188
-
189
- elif country_code == 'FR':
190
- data = Data_FR
191
- weather_columns = ['Temperature', 'Wind Speed']
192
- data['Temperature'] = data['temperature_2m']
193
- data['Wind Speed'] = data['wind_speed_100m']
194
 
195
  def add_feature(df2, df_main):
196
  #df_main.index = pd.to_datetime(df_main.index)
@@ -208,12 +210,7 @@ def add_feature(df2, df_main):
208
  forecast_columns = [
209
  'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
210
 
211
- if section == 'Data':
212
- st.header("Data")
213
- st.write("""
214
- This section allows you to explore and upload your datasets.
215
- You can visualize raw data, clean it, and prepare it for analysis.
216
- """)
217
 
218
  st.header('Data Quality')
219
 
@@ -282,17 +279,14 @@ if section == 'Data':
282
  st.write('<b><u>Extreme/Nonsensical values (%)</u></b>: Values that are considered implausible such as negative or out-of-bound values i.e., (generation<0) or (generation>capacity)', unsafe_allow_html=True)
283
 
284
  # Section 2: Forecasts
285
- elif section == 'Forecasts':
286
 
287
  st.header('Forecast Quality')
288
 
289
  # Time series for last 1 week
290
- st.subheader('Time Series: Last 1 Week')
291
  last_week = data.loc[data.index >= (data.index[-1] - pd.Timedelta(days=7))]
292
- st.write('The below plots show the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform between the selected data range.')
293
 
294
- forecast_columns = [
295
- 'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
296
  num_per_var=2
297
 
298
  forecast_columns_line=forecast_columns
@@ -308,259 +302,6 @@ elif section == 'Forecasts':
308
  fig.update_layout(title=f'Forecasts vs Actual for {actual_col}', xaxis_title='Date', yaxis_title='Value [MW]')
309
 
310
  st.plotly_chart(fig)
311
-
312
-
313
- def plot_category(df_dict, category_prefix, title):
314
- fig = go.Figure()
315
-
316
- # Define base colors for each model
317
- model_colors = {
318
- 'LightGBMModel.TimeCov.Temp.Forecast_elia': '#1f77b4', # Blue
319
- 'LightGBMModel.TimeCov.Temp': '#2ca02c', # Green
320
- 'Naive': '#ff7f0e' # Orange
321
- }
322
-
323
- # To keep track of which model has been added to the legend
324
- legend_added = {'LightGBMModel.TimeCov.Temp.Forecast_elia': False, 'LightGBMModel.TimeCov.Temp': False, 'Naive': False}
325
-
326
- for file_name, df in df_dict.items():
327
- # Extract the hour from the filename, assuming the format is "Predictions_Xh.csv"
328
- hour = int(file_name.split('_')[1].replace('h.csv', ''))
329
-
330
- filtered_columns = [col for col in df.columns if col.startswith(category_prefix)]
331
- for column in filtered_columns:
332
- # Identify the model type with more precise logic
333
- if 'LightGBMModel' in column:
334
- if 'Forecast_elia' in column:
335
- model_key = 'LightGBMModel.TimeCov.Temp.Forecast_elia'
336
- elif 'TimeCov' in column:
337
- model_key = 'LightGBMModel.TimeCov.Temp'
338
- elif 'Naive' in column:
339
- model_key = 'Naive'
340
- else:
341
- continue # Skip if it doesn't match any model type
342
-
343
- # Extract the relevant part of the model name
344
- parts = column.split('.')
345
- model_name_parts = parts[1:] # Skip the variable prefix
346
- model_name = '.'.join(model_name_parts) # Rejoin the parts to form the model name
347
-
348
- # Get the base color for the model
349
- base_color = model_colors[model_key]
350
-
351
- # Calculate the color shade based on the hour
352
- color_scale = pc.hex_to_rgb(base_color)
353
- scale_factor = 0.3 + (hour / 40) # Adjust scale to ensure the gradient is visible
354
- adjusted_color = tuple(int(c * scale_factor) for c in color_scale)
355
- # Convert to RGBA with transparency for plot lines
356
- line_color = f'rgba({adjusted_color[0]}, {adjusted_color[1]}, {adjusted_color[2]}, 0.1)' # Transparent color for lines
357
-
358
- # Combine the hour and the model name for the legend, but only add the legend entry once
359
- show_legend = not legend_added[model_key]
360
-
361
- fig.add_trace(go.Scatter(
362
- x=df.index, # Assuming 'Date' is the index, use 'df.index' for x-axis
363
- y=df[column],
364
- mode='lines',
365
- name=model_name if show_legend else None, # Use the model name for the legend, but only once
366
- line=dict(color=base_color if show_legend else line_color), # Use opaque color for legend, transparent for lines
367
- showlegend=show_legend, # Show legend only once per model
368
- legendgroup=model_key # Grouping for consistent legend color
369
- ))
370
-
371
- # Mark that this model has been added to the legend
372
- if show_legend:
373
- legend_added[model_key] = True
374
-
375
- # Add real values as a separate trace, if provided
376
- filtered_Data_BE_df = Data_BE.loc[df.index]
377
-
378
- if filtered_Data_BE_df[f'{category_prefix}_entsoe'].notna().any():
379
- fig.add_trace(go.Scatter(
380
- x=filtered_Data_BE_df.index,
381
- y=filtered_Data_BE_df[f'{category_prefix}_entsoe'],
382
- mode='lines',
383
- name=f'Actual {category_prefix}',
384
- line=dict(color='black', width=2), # Black line for real values
385
- showlegend=True # Always show this in the legend
386
- ))
387
-
388
- # Update layout to position the legend at the top, side by side
389
- fig.update_layout(
390
- title=dict(
391
- text=title,
392
- x=0, # Center the title horizontally
393
- y=1.00, # Slightly lower the title to create more space
394
- xanchor='left',
395
- yanchor='top'
396
- ),
397
- xaxis_title='Date',
398
- yaxis_title='Value',
399
- legend=dict(
400
- orientation="h", # Horizontal legend
401
- yanchor="bottom", # Align to the bottom of the legend box
402
- y=1, # Increase y position to avoid overlap with the title
403
- xanchor="center", # Center the legend horizontally
404
- x=0.5 # Position at the center of the plot
405
- )
406
- )
407
- return fig
408
-
409
-
410
- def calculate_mae(y_true, y_pred):
411
- return np.mean(np.abs(y_true - y_pred))
412
- def plot_mae_comparison(df_dict, category_prefix, title, real_values_df):
413
- hours = list(range(24))
414
- if category_prefix=='Load':
415
- model_colors = {
416
- 'LightGBMModel.7D.TimeCov.Temp.Forecast_elia': '#1F77B4', # Blue
417
- 'LightGBMModel.7D.TimeCov.Temp': '#2CA02C', # Green
418
- 'Naive': '#FF7F0E' # Orange
419
- }
420
- else:
421
- model_colors = {
422
- 'LightGBMModel.1D.TimeCov.Temp.Forecast_elia': '#1F77B4', # Blue
423
- 'LightGBMModel.1D.TimeCov.Temp': '#2CA02C', # Green
424
- 'Naive': '#FF7F0E' # Orange
425
- }
426
- fig = go.Figure()
427
- for model_key, base_color in model_colors.items():
428
- hours_with_data = []
429
- mae_ratios = []
430
- for hour in hours:
431
- file_name = f'Predictions_{hour}h.csv'
432
- df = df_dict.get(file_name, None)
433
- if df is None:
434
- continue
435
- if isinstance(df.index, pd.DatetimeIndex):
436
- first_day = df.index.min().normalize()
437
- last_day = df.index.max().normalize()
438
- df = df[df.index.normalize() != first_day]
439
- df = df[df.index.normalize() != last_day]
440
- # Adjusted filtering logic based on actual column names
441
- filtered_columns = [col for col in df.columns if col.startswith(f"{category_prefix}_entsoe") and model_key in col]
442
- if not filtered_columns:
443
- continue
444
- # Assuming only one column matches, otherwise refine the selection logic
445
- model_predictions = df[filtered_columns[0]]
446
- actual_values = real_values_df[f'{category_prefix}_entsoe']
447
- actual_values = actual_values.dropna()
448
- # Align both series by their common indices
449
- common_indices = model_predictions.index.intersection(actual_values.index)
450
- aligned_model_predictions = model_predictions.loc[common_indices]
451
- aligned_actual_values = actual_values.loc[common_indices]
452
- # Calculate MAE for the model
453
- model_mae = calculate_mae(aligned_actual_values, aligned_model_predictions)
454
- # Calculate MAE for the entsoe forecast
455
- entsoe_forecast = real_values_df[f'{category_prefix}_forecast_entsoe'].loc[common_indices]
456
- entsoe_mae = calculate_mae(aligned_actual_values, entsoe_forecast)
457
- # Calculate MAE ratio
458
- mae_ratio = model_mae / entsoe_mae
459
- mae_ratios.append(mae_ratio)
460
- hours_with_data.append(hour)
461
- # Plot the MAE ratio for this model as points
462
- if mae_ratios: # Only plot if there's data
463
- fig.add_trace(go.Scatter(
464
- x=hours_with_data, # The hours where we have data
465
- y=mae_ratios,
466
- mode='markers+lines', # Plot as points connected by lines
467
- name=model_key,
468
- line=dict(color=base_color),
469
- marker=dict(color=base_color, size=8) # Customize marker size
470
- ))
471
- # Update layout
472
- fig.update_layout(
473
- title=f'{category_prefix}: rMAE<span style="font-size:11px;">ENTSO-E</span> by hour of Forecasting.',
474
- xaxis_title='Hour of Forecast',
475
- yaxis_title='MAE Ratio (Model / entsoe)',
476
- legend=dict(
477
- orientation="h",
478
- yanchor="bottom",
479
- y=1.02,
480
- xanchor="center",
481
- x=0.5
482
- )
483
- )
484
- return fig
485
-
486
-
487
-
488
- def plot_mae_comparison_clock(df_dict, category_prefix, title, real_values_df):
489
- hours = list(range(24))
490
- if category_prefix=='Load':
491
- model_colors = {
492
- 'LightGBM_with_Forecast_elia': '#1F77B4', # Blue
493
- 'LightGBM': '#2CA02C', # Green
494
- 'Naive': '#FF7F0E' # Orange
495
- }
496
- else:
497
- model_colors = {
498
- 'LightGBM_with_Forecast_elia': '#1F77B4', # Blue
499
- 'LightGBM': '#2CA02C', # Green
500
- 'Naive': '#FF7F0E' # Orange
501
- }
502
-
503
- fig = go.Figure()
504
-
505
- for model_key, base_color in model_colors.items():
506
- hours_with_data = []
507
- mae_ratios = []
508
-
509
- for hour in hours:
510
- file_name = f'Predictions_{hour}h.csv'
511
- df = df_dict.get(file_name, None)
512
- if df is None:
513
- continue
514
-
515
- if isinstance(df.index, pd.DatetimeIndex):
516
- first_day = df.index.min().normalize()
517
- last_day = df.index.max().normalize()
518
- df = df[df.index.normalize() != first_day]
519
- df = df[df.index.normalize() != last_day]
520
-
521
- filtered_columns = [col for col in df.columns if col.startswith(f"{category_prefix}_entsoe") and model_key in col]
522
- if not filtered_columns:
523
- print(f"No matching columns for {model_key} at hour {hour}. Skipping...")
524
- continue
525
-
526
- model_predictions = df[filtered_columns[0]]
527
- actual_values = real_values_df[f'{category_prefix}_entsoe']
528
- actual_values = actual_values.dropna()
529
-
530
- common_indices = model_predictions.index.intersection(actual_values.index)
531
- aligned_model_predictions = model_predictions.loc[common_indices]
532
- aligned_actual_values = actual_values.loc[common_indices]
533
-
534
- model_mae = calculate_mae(aligned_actual_values, aligned_model_predictions)
535
- entsoe_forecast = real_values_df[f'{category_prefix}_forecast_entsoe'].loc[common_indices]
536
- entsoe_mae = calculate_mae(aligned_actual_values, entsoe_forecast)
537
-
538
- mae_ratio = model_mae / entsoe_mae
539
- mae_ratios.append(mae_ratio)
540
- hours_with_data.append(hour)
541
-
542
- if mae_ratios:
543
- fig.add_trace(go.Scatterpolar(
544
- r=mae_ratios + [mae_ratios[0]], # Ensure closure of the polar plot
545
- theta=[h * 15 for h in hours_with_data] + [0], # Ensure closure at 0 degrees
546
- mode='lines+markers',
547
- name=model_key,
548
- line=dict(color=base_color),
549
- marker=dict(color=base_color, size=8)
550
- ))
551
- else:
552
- print(f"No data to plot for {model_key}.") # Debugging print
553
-
554
- fig.update_layout(
555
- polar=dict(
556
- radialaxis=dict(visible=True, range=[0, max(max(mae_ratios), 1.0) * 1.1] if mae_ratios else [0, 1.0]),
557
- angularaxis=dict(tickmode='array', tickvals=[h * 15 for h in hours], ticktext=hours)
558
- ),
559
- title=f'{category_prefix}: rMAE<span style="font-size:11px;">ENTSO-E</span> by Hour of Forecasting',
560
- showlegend=True
561
- )
562
-
563
- return fig
564
 
565
 
566
  # Scatter plots for error distribution
@@ -637,10 +378,8 @@ elif section == 'Forecasts':
637
 
638
  $\text{rMAE} = \frac{\text{MAE}}{MAE_{\text{Persistence Model}}}$
639
 
640
-
641
  """)
642
 
643
-
644
 
645
  st.subheader('ACF plots of Errors')
646
  st.write('The below plots show the ACF (Auto-Correlation Function) for the errors of all three data fields obtained from ENTSO-E: Solar, Wind and Load.')
@@ -669,44 +408,151 @@ elif section == 'Insights':
669
  """)
670
 
671
  # Scatter plots for correlation between wind, solar, and load
672
- st.subheader('Correlation between Wind, Solar, and Load')
673
- st.write('The below scatter plots are made for checking whether there exists a correlation between all three data fields obtained from ENTSO-E: Solar, Wind and Load.')
674
-
675
- combinations = [('Solar_entsoe', 'Load_entsoe'), ('Wind_onshore_entsoe', 'Load_entsoe'), ('Wind_offshore_entsoe', 'Load_entsoe'), ('Solar_entsoe', 'Wind_onshore_entsoe'), ('Solar_entsoe', 'Wind_offshore_entsoe')]
676
-
677
- for x_col, y_col in combinations:
678
- if x_col in data.columns and y_col in data.columns:
679
- # For solar combinations, filter out zero values
680
- if 'Solar_entsoe' in x_col:
681
- filtered_data = data[data['Solar_entsoe'] > 0]
682
- x_values = filtered_data[x_col]
683
- y_values = filtered_data[y_col]
684
- else:
685
- x_values = data[x_col]
686
- y_values = data[y_col]
687
-
688
- corr_coef = x_values.corr(y_values)
689
- fig = px.scatter(
690
- x=x_values,
691
- y=y_values,
692
- labels={'x': f'{x_col} [MW]', 'y': f'{y_col} [MW]'},
693
- title=f'{x_col} vs {y_col} (Correlation: {corr_coef:.2f})', color_discrete_sequence=['grey'])
694
- st.plotly_chart(fig)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
695
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
696
 
697
- st.subheader('Weather vs. Generation/Demand')
698
- st.write('The below scatter plots show the relation between weather parameters (i.e., Temperature, Wind Speed) and the generation/demand data from ENTSO-E.')
 
 
 
 
 
699
 
700
- for weather_col in weather_columns:
701
- for actual_col in ['Load_entsoe', 'Solar_entsoe', 'Wind_onshore_entsoe', 'Wind_offshore_entsoe']:
702
- if weather_col in data.columns and actual_col in data.columns:
703
- clean_label = actual_col.replace('_entsoe', '')
704
 
705
- if weather_col == 'Temperature':
706
- fig = px.scatter(x=data[weather_col], y=data[actual_col], labels={'x': f'{weather_col} (°C)', 'y': f'{clean_label} Generation [MW]'}, color_discrete_sequence=['orange'])
707
- else:
708
- fig = px.scatter(x=data[weather_col], y=data[actual_col], labels={'x': f'{weather_col} (km/h)', 'y': clean_label})
709
- fig.update_layout(title=f'{weather_col} vs {actual_col}')
710
- st.plotly_chart(fig)
711
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
712
 
 
129
 
130
 
131
  countries = {
132
+ 'Overall': 'Overall',
133
  'Netherlands': 'NL',
134
  'Germany': 'DE',
135
  'France': 'FR',
 
144
 
145
  selected_country = st.sidebar.selectbox('Select Country', list(countries.keys()))
146
 
147
+ # Ensure the date range provides two dates
148
 
 
 
149
 
150
+ # Sidebar with radio buttons for different sections
151
+ if selected_country != 'Overall':
152
+ st.sidebar.subheader("Section")
153
+ st.sidebar.caption("Select the type of information you want to explore.")
154
+ section = st.sidebar.radio('', ['Data Quality', 'Forecasts Quality', 'Insights'], index=1)
155
+ date_range = st.sidebar.date_input("Select Date Range for Metrics Calculation:",
156
  value=(pd.to_datetime("2024-01-01"), pd.to_datetime(pd.Timestamp('today'))))
157
+ if len(date_range) == 2:
158
+ start_date = pd.Timestamp(date_range[0])
159
+ end_date = pd.Timestamp(date_range[1])
160
+ else:
161
+ st.error("Please select a valid date range.")
162
+ st.stop()
163
 
 
 
 
 
164
  else:
165
+ section = None # No section is shown when "Overall" is selected
 
 
 
 
166
 
167
+ if selected_country == 'Overall':
168
+ data = None # You can set data to None or a specific dataset based on your logic
169
+ section = None # No section selected when "Overall" is chosen
170
+ else:
171
+ country_code = countries[selected_country]
172
+ if country_code == 'BE':
173
+ data = Data_BE
174
+ weather_columns = ['Temperature', 'Wind Speed Onshore', 'Wind Speed Offshore']
175
+ data['Temperature'] = data['temperature_2m_8']
176
+ data['Wind Speed Offshore'] = data['wind_speed_100m_4']
177
+ data['Wind Speed Onshore'] = data['wind_speed_100m_8']
178
+
179
+ elif country_code == 'DE':
180
+ data = Data_DE
181
+ weather_columns = ['Temperature', 'Wind Speed']
182
+ data['Temperature'] = data['temperature_2m']
183
+ data['Wind Speed'] = data['wind_speed_100m']
184
+
185
+ elif country_code == 'NL':
186
+ data = Data_NL
187
+ weather_columns = ['Temperature', 'Wind Speed']
188
+ data['Temperature'] = data['temperature_2m']
189
+ data['Wind Speed'] = data['wind_speed_100m']
190
+
191
+ elif country_code == 'FR':
192
+ data = Data_FR
193
+ weather_columns = ['Temperature', 'Wind Speed']
194
+ data['Temperature'] = data['temperature_2m']
195
+ data['Wind Speed'] = data['wind_speed_100m']
196
 
197
  def add_feature(df2, df_main):
198
  #df_main.index = pd.to_datetime(df_main.index)
 
210
  forecast_columns = [
211
  'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
212
 
213
+ if section == 'Data Quality':
 
 
 
 
 
214
 
215
  st.header('Data Quality')
216
 
 
279
  st.write('<b><u>Extreme/Nonsensical values (%)</u></b>: Values that are considered implausible such as negative or out-of-bound values i.e., (generation<0) or (generation>capacity)', unsafe_allow_html=True)
280
 
281
  # Section 2: Forecasts
282
+ elif section == 'Forecasts Quality':
283
 
284
  st.header('Forecast Quality')
285
 
286
  # Time series for last 1 week
 
287
  last_week = data.loc[data.index >= (data.index[-1] - pd.Timedelta(days=7))]
288
+ st.write('The below plots show the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform from the past week.')
289
 
 
 
290
  num_per_var=2
291
 
292
  forecast_columns_line=forecast_columns
 
302
  fig.update_layout(title=f'Forecasts vs Actual for {actual_col}', xaxis_title='Date', yaxis_title='Value [MW]')
303
 
304
  st.plotly_chart(fig)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
 
306
 
307
  # Scatter plots for error distribution
 
378
 
379
  $\text{rMAE} = \frac{\text{MAE}}{MAE_{\text{Persistence Model}}}$
380
 
 
381
  """)
382
 
 
383
 
384
  st.subheader('ACF plots of Errors')
385
  st.write('The below plots show the ACF (Auto-Correlation Function) for the errors of all three data fields obtained from ENTSO-E: Solar, Wind and Load.')
 
408
  """)
409
 
410
  # Scatter plots for correlation between wind, solar, and load
411
+ st.subheader('Correlation between Wind, Solar, Load and Weather Features')
412
+ st.write('The below scatter plots are made for checking whether there exists a correlation between the data fields obtained: Solar, Wind, Load and Weather Features.')
413
+
414
+ selected_columns=['Load_entsoe', 'Solar_entsoe', 'Wind_offshore_entsoe', 'Wind_onshore_entsoe'] + weather_columns
415
+ selected_df=data[selected_columns]
416
+ selected_df.columns = [col.replace('_entsoe', '').replace('_', ' ') for col in selected_df.columns]
417
+ selected_df = selected_df.dropna()
418
+ print(selected_df)
419
+ sns.set_theme(style="ticks")
420
+ pairplot_fig = sns.pairplot(selected_df)
421
+
422
+ # Display the pairplot in Streamlit
423
+ st.pyplot(pairplot_fig)
424
+
425
+ elif selected_country == 'Overall':
426
+ st.subheader("Net Load Error Map")
427
+ st.write("""
428
+ The net load error map highlights the error in the forecasted versus actual net load for each country.
429
+ Hover over each country to see details on the latest net load error and the timestamp of the last recorded data.
430
+ """)
431
+
432
+ def plot_net_load_error_map(data_dict):
433
+ # Define forecast columns used in calculation
434
+
435
+ def calculate_net_load_error(df):
436
+ filter_df = df[forecast_columns].dropna()
437
+ net_load = filter_df['Load_entsoe'] - filter_df['Wind_onshore_entsoe'] - filter_df['Wind_offshore_entsoe'] - filter_df['Solar_entsoe']
438
+ net_load_forecast = filter_df['Load_forecast_entsoe'] - filter_df['Wind_onshore_forecast_entsoe'] - filter_df['Wind_offshore_forecast_entsoe'] - filter_df['Solar_forecast_entsoe']
439
+ error = (net_load - net_load_forecast).iloc[-1]
440
+ date = filter_df.index[-1].strftime("%Y-%m-%d %H:%M") # Get the latest date in string format
441
+ return error, date
442
+
443
+ # Calculate net load errors and dates for each country
444
+ net_load_errors = {country_name: calculate_net_load_error(data) for country_name, data in data_dict.items()}
445
+
446
+ # Create DataFrame for Folium with additional date column
447
+ df_net_load_error = pd.DataFrame({
448
+ 'country': list(net_load_errors.keys()),
449
+ 'net_load_error': [v[0] for v in net_load_errors.values()],
450
+ 'date': [v[1] for v in net_load_errors.values()]
451
+ })
452
+
453
+ # Load the GeoJSON file
454
+ geojson_url = "https://raw.githubusercontent.com/python-visualization/folium/master/examples/data/world-countries.json"
455
+ geo_data = requests.get(geojson_url).json()
456
+
457
+ # Filter GeoJSON to only include the selected countries
458
+ selected_countries = list(data_dict.keys()) # Get the list of selected countries (Belgium, France, Germany, Netherlands)
459
+ filtered_geojson = {
460
+ "type": "FeatureCollection",
461
+ "features": [feature for feature in geo_data["features"] if feature["properties"]["name"] in selected_countries]
462
+ }
463
 
464
+ # Merge the geojson with the error and date data
465
+ for feature in filtered_geojson["features"]:
466
+ country_name = feature["properties"]["name"]
467
+ row = df_net_load_error[df_net_load_error['country'] == country_name]
468
+ if not row.empty:
469
+ feature["properties"]["net_load_error"] = row.iloc[0]["net_load_error"]
470
+ feature["properties"]["date"] = row.iloc[0]["date"]
471
+
472
+ # Initialize the Folium map centered on Central Europe
473
+ m = folium.Map(location=[51, 10], zoom_start=5, tiles="cartodb positron")
474
+
475
+ # Add choropleth layer to map net load errors by country
476
+ folium.Choropleth(
477
+ geo_data=filtered_geojson,
478
+ name="choropleth",
479
+ data=df_net_load_error,
480
+ columns=["country", "net_load_error"],
481
+ key_on="feature.properties.name",
482
+ fill_color="RdYlBu", # Use a more vibrant color palette
483
+ fill_opacity=0.7,
484
+ line_opacity=0.5,
485
+ line_color="black", # Neutral border color
486
+ legend_name="Net Load Error"
487
+ ).add_to(m)
488
+
489
+ # Add a GeoJson layer with custom tooltip for country, error, and date
490
+ folium.GeoJson(
491
+ filtered_geojson,
492
+ style_function=lambda x: {'fillOpacity': 0, 'color': 'black', 'weight': 0},
493
+ tooltip=folium.GeoJsonTooltip(
494
+ fields=["name", "net_load_error", "date"],
495
+ aliases=["Country:", "Net Load Error:", "Date:"],
496
+ localize=True
497
+ )
498
+ ).add_to(m)
499
+
500
+ # Display Folium map in Streamlit
501
+ st_folium(m, width=700, height=600)
502
 
503
+ # Data dictionary with full country names
504
+ data_dict = {
505
+ 'Belgium': Data_BE,
506
+ 'France': Data_FR,
507
+ 'Germany': Data_DE,
508
+ 'Netherlands': Data_NL
509
+ }
510
 
511
+ # Call the function to plot the map
512
+ plot_net_load_error_map(data_dict)
 
 
513
 
514
+ st.subheader("rMAE of Forecasts published on ENTSO-E TP")
515
+ st.write("""
516
+ The radar chart below compares the forecast accuracy across Load, Onshore Wind, Offshore Wind, and Solar for each country.
517
+ """)
 
 
518
 
519
+ def calculate_mae(actual, forecast):
520
+ return np.mean(np.abs(actual - forecast))
521
+
522
+ # Function to calculate persistence MAE
523
+ def calculate_persistence_mae(data, shift_hours):
524
+ return np.mean(np.abs(data - data.shift(shift_hours)))
525
+
526
+ # Function to calculate rMAE for each country
527
+ def calculate_rmae_for_country(df):
528
+ rmae = {}
529
+ rmae['Load'] = calculate_mae(df['Load_entsoe'], df['Load_forecast_entsoe']) / calculate_persistence_mae(df['Load_entsoe'], 168)
530
+ rmae['Wind_onshore'] = calculate_mae(df['Wind_onshore_entsoe'], df['Wind_onshore_forecast_entsoe']) / calculate_persistence_mae(df['Wind_onshore_entsoe'], 24)
531
+ rmae['Wind_offshore'] = calculate_mae(df['Wind_offshore_entsoe'], df['Wind_offshore_forecast_entsoe']) / calculate_persistence_mae(df['Wind_offshore_entsoe'], 24)
532
+ rmae['Solar'] = calculate_mae(df['Solar_entsoe'], df['Solar_forecast_entsoe']) / calculate_persistence_mae(df['Solar_entsoe'], 24)
533
+ return rmae
534
+
535
+ # Function to create rMAE DataFrame
536
+ def create_rmae_dataframe(data_dict):
537
+ rmae_values = {'Country': [], 'Load': [], 'Wind_onshore': [], 'Wind_offshore': [], 'Solar': []}
538
+ for country_name, df in data_dict.items():
539
+ df_filtered = df[forecast_columns].dropna()
540
+ rmae = calculate_rmae_for_country(df_filtered)
541
+ rmae_values['Country'].append(country_name)
542
+ for key in rmae:
543
+ rmae_values[key].append(rmae[key])
544
+ return pd.DataFrame(rmae_values)
545
+
546
+ # Function to plot radar chart
547
+ def plot_rmae_radar_chart(rmae_df):
548
+ fig = go.Figure()
549
+ angles = ['Load', 'Wind_onshore', 'Wind_offshore', 'Solar']
550
+ for _, row in rmae_df.iterrows():
551
+ fig.add_trace(go.Scatterpolar(r=[row[angle] for angle in angles], theta=angles, fill='toself', name=row['Country']))
552
+ fig.update_layout(polar=dict(radialaxis=dict(visible=True, range=[0, 2])), showlegend=True, title="rMAE Radar Chart by Country")
553
+ st.plotly_chart(fig)
554
+
555
+ # Main execution to create and display radar plot
556
+ rmae_df = create_rmae_dataframe(data_dict)
557
+ plot_rmae_radar_chart(rmae_df)
558