Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -129,6 +129,7 @@ upper_space.markdown("""
|
|
129 |
|
130 |
|
131 |
countries = {
|
|
|
132 |
'Netherlands': 'NL',
|
133 |
'Germany': 'DE',
|
134 |
'France': 'FR',
|
@@ -143,54 +144,55 @@ st.sidebar.caption("Choose the country for which you want to display data or for
|
|
143 |
|
144 |
selected_country = st.sidebar.selectbox('Select Country', list(countries.keys()))
|
145 |
|
|
|
146 |
|
147 |
-
st.sidebar.subheader("Select Date Range ")
|
148 |
-
st.sidebar.caption("Define the time period over which the accuracy metrics will be calculated.")
|
149 |
|
150 |
-
|
151 |
-
|
|
|
|
|
|
|
|
|
152 |
value=(pd.to_datetime("2024-01-01"), pd.to_datetime(pd.Timestamp('today'))))
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
|
154 |
-
# Ensure the date range provides two dates
|
155 |
-
if len(date_range) == 2:
|
156 |
-
start_date = pd.Timestamp(date_range[0])
|
157 |
-
end_date = pd.Timestamp(date_range[1])
|
158 |
else:
|
159 |
-
|
160 |
-
st.stop()
|
161 |
-
|
162 |
-
st.sidebar.subheader("Section")
|
163 |
-
st.sidebar.caption("Select the type of information you want to explore.")
|
164 |
|
165 |
-
|
166 |
-
#
|
167 |
-
section =
|
168 |
-
|
169 |
-
country_code = countries[selected_country]
|
170 |
-
if country_code == 'BE':
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
elif country_code == 'DE':
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
elif country_code == 'NL':
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
elif country_code == 'FR':
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
|
195 |
def add_feature(df2, df_main):
|
196 |
#df_main.index = pd.to_datetime(df_main.index)
|
@@ -208,12 +210,7 @@ def add_feature(df2, df_main):
|
|
208 |
forecast_columns = [
|
209 |
'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
|
210 |
|
211 |
-
if section == 'Data':
|
212 |
-
st.header("Data")
|
213 |
-
st.write("""
|
214 |
-
This section allows you to explore and upload your datasets.
|
215 |
-
You can visualize raw data, clean it, and prepare it for analysis.
|
216 |
-
""")
|
217 |
|
218 |
st.header('Data Quality')
|
219 |
|
@@ -282,17 +279,14 @@ if section == 'Data':
|
|
282 |
st.write('<b><u>Extreme/Nonsensical values (%)</u></b>: Values that are considered implausible such as negative or out-of-bound values i.e., (generation<0) or (generation>capacity)', unsafe_allow_html=True)
|
283 |
|
284 |
# Section 2: Forecasts
|
285 |
-
elif section == 'Forecasts':
|
286 |
|
287 |
st.header('Forecast Quality')
|
288 |
|
289 |
# Time series for last 1 week
|
290 |
-
st.subheader('Time Series: Last 1 Week')
|
291 |
last_week = data.loc[data.index >= (data.index[-1] - pd.Timedelta(days=7))]
|
292 |
-
st.write('The below plots show the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform
|
293 |
|
294 |
-
forecast_columns = [
|
295 |
-
'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
|
296 |
num_per_var=2
|
297 |
|
298 |
forecast_columns_line=forecast_columns
|
@@ -308,259 +302,6 @@ elif section == 'Forecasts':
|
|
308 |
fig.update_layout(title=f'Forecasts vs Actual for {actual_col}', xaxis_title='Date', yaxis_title='Value [MW]')
|
309 |
|
310 |
st.plotly_chart(fig)
|
311 |
-
|
312 |
-
|
313 |
-
def plot_category(df_dict, category_prefix, title):
|
314 |
-
fig = go.Figure()
|
315 |
-
|
316 |
-
# Define base colors for each model
|
317 |
-
model_colors = {
|
318 |
-
'LightGBMModel.TimeCov.Temp.Forecast_elia': '#1f77b4', # Blue
|
319 |
-
'LightGBMModel.TimeCov.Temp': '#2ca02c', # Green
|
320 |
-
'Naive': '#ff7f0e' # Orange
|
321 |
-
}
|
322 |
-
|
323 |
-
# To keep track of which model has been added to the legend
|
324 |
-
legend_added = {'LightGBMModel.TimeCov.Temp.Forecast_elia': False, 'LightGBMModel.TimeCov.Temp': False, 'Naive': False}
|
325 |
-
|
326 |
-
for file_name, df in df_dict.items():
|
327 |
-
# Extract the hour from the filename, assuming the format is "Predictions_Xh.csv"
|
328 |
-
hour = int(file_name.split('_')[1].replace('h.csv', ''))
|
329 |
-
|
330 |
-
filtered_columns = [col for col in df.columns if col.startswith(category_prefix)]
|
331 |
-
for column in filtered_columns:
|
332 |
-
# Identify the model type with more precise logic
|
333 |
-
if 'LightGBMModel' in column:
|
334 |
-
if 'Forecast_elia' in column:
|
335 |
-
model_key = 'LightGBMModel.TimeCov.Temp.Forecast_elia'
|
336 |
-
elif 'TimeCov' in column:
|
337 |
-
model_key = 'LightGBMModel.TimeCov.Temp'
|
338 |
-
elif 'Naive' in column:
|
339 |
-
model_key = 'Naive'
|
340 |
-
else:
|
341 |
-
continue # Skip if it doesn't match any model type
|
342 |
-
|
343 |
-
# Extract the relevant part of the model name
|
344 |
-
parts = column.split('.')
|
345 |
-
model_name_parts = parts[1:] # Skip the variable prefix
|
346 |
-
model_name = '.'.join(model_name_parts) # Rejoin the parts to form the model name
|
347 |
-
|
348 |
-
# Get the base color for the model
|
349 |
-
base_color = model_colors[model_key]
|
350 |
-
|
351 |
-
# Calculate the color shade based on the hour
|
352 |
-
color_scale = pc.hex_to_rgb(base_color)
|
353 |
-
scale_factor = 0.3 + (hour / 40) # Adjust scale to ensure the gradient is visible
|
354 |
-
adjusted_color = tuple(int(c * scale_factor) for c in color_scale)
|
355 |
-
# Convert to RGBA with transparency for plot lines
|
356 |
-
line_color = f'rgba({adjusted_color[0]}, {adjusted_color[1]}, {adjusted_color[2]}, 0.1)' # Transparent color for lines
|
357 |
-
|
358 |
-
# Combine the hour and the model name for the legend, but only add the legend entry once
|
359 |
-
show_legend = not legend_added[model_key]
|
360 |
-
|
361 |
-
fig.add_trace(go.Scatter(
|
362 |
-
x=df.index, # Assuming 'Date' is the index, use 'df.index' for x-axis
|
363 |
-
y=df[column],
|
364 |
-
mode='lines',
|
365 |
-
name=model_name if show_legend else None, # Use the model name for the legend, but only once
|
366 |
-
line=dict(color=base_color if show_legend else line_color), # Use opaque color for legend, transparent for lines
|
367 |
-
showlegend=show_legend, # Show legend only once per model
|
368 |
-
legendgroup=model_key # Grouping for consistent legend color
|
369 |
-
))
|
370 |
-
|
371 |
-
# Mark that this model has been added to the legend
|
372 |
-
if show_legend:
|
373 |
-
legend_added[model_key] = True
|
374 |
-
|
375 |
-
# Add real values as a separate trace, if provided
|
376 |
-
filtered_Data_BE_df = Data_BE.loc[df.index]
|
377 |
-
|
378 |
-
if filtered_Data_BE_df[f'{category_prefix}_entsoe'].notna().any():
|
379 |
-
fig.add_trace(go.Scatter(
|
380 |
-
x=filtered_Data_BE_df.index,
|
381 |
-
y=filtered_Data_BE_df[f'{category_prefix}_entsoe'],
|
382 |
-
mode='lines',
|
383 |
-
name=f'Actual {category_prefix}',
|
384 |
-
line=dict(color='black', width=2), # Black line for real values
|
385 |
-
showlegend=True # Always show this in the legend
|
386 |
-
))
|
387 |
-
|
388 |
-
# Update layout to position the legend at the top, side by side
|
389 |
-
fig.update_layout(
|
390 |
-
title=dict(
|
391 |
-
text=title,
|
392 |
-
x=0, # Center the title horizontally
|
393 |
-
y=1.00, # Slightly lower the title to create more space
|
394 |
-
xanchor='left',
|
395 |
-
yanchor='top'
|
396 |
-
),
|
397 |
-
xaxis_title='Date',
|
398 |
-
yaxis_title='Value',
|
399 |
-
legend=dict(
|
400 |
-
orientation="h", # Horizontal legend
|
401 |
-
yanchor="bottom", # Align to the bottom of the legend box
|
402 |
-
y=1, # Increase y position to avoid overlap with the title
|
403 |
-
xanchor="center", # Center the legend horizontally
|
404 |
-
x=0.5 # Position at the center of the plot
|
405 |
-
)
|
406 |
-
)
|
407 |
-
return fig
|
408 |
-
|
409 |
-
|
410 |
-
def calculate_mae(y_true, y_pred):
|
411 |
-
return np.mean(np.abs(y_true - y_pred))
|
412 |
-
def plot_mae_comparison(df_dict, category_prefix, title, real_values_df):
|
413 |
-
hours = list(range(24))
|
414 |
-
if category_prefix=='Load':
|
415 |
-
model_colors = {
|
416 |
-
'LightGBMModel.7D.TimeCov.Temp.Forecast_elia': '#1F77B4', # Blue
|
417 |
-
'LightGBMModel.7D.TimeCov.Temp': '#2CA02C', # Green
|
418 |
-
'Naive': '#FF7F0E' # Orange
|
419 |
-
}
|
420 |
-
else:
|
421 |
-
model_colors = {
|
422 |
-
'LightGBMModel.1D.TimeCov.Temp.Forecast_elia': '#1F77B4', # Blue
|
423 |
-
'LightGBMModel.1D.TimeCov.Temp': '#2CA02C', # Green
|
424 |
-
'Naive': '#FF7F0E' # Orange
|
425 |
-
}
|
426 |
-
fig = go.Figure()
|
427 |
-
for model_key, base_color in model_colors.items():
|
428 |
-
hours_with_data = []
|
429 |
-
mae_ratios = []
|
430 |
-
for hour in hours:
|
431 |
-
file_name = f'Predictions_{hour}h.csv'
|
432 |
-
df = df_dict.get(file_name, None)
|
433 |
-
if df is None:
|
434 |
-
continue
|
435 |
-
if isinstance(df.index, pd.DatetimeIndex):
|
436 |
-
first_day = df.index.min().normalize()
|
437 |
-
last_day = df.index.max().normalize()
|
438 |
-
df = df[df.index.normalize() != first_day]
|
439 |
-
df = df[df.index.normalize() != last_day]
|
440 |
-
# Adjusted filtering logic based on actual column names
|
441 |
-
filtered_columns = [col for col in df.columns if col.startswith(f"{category_prefix}_entsoe") and model_key in col]
|
442 |
-
if not filtered_columns:
|
443 |
-
continue
|
444 |
-
# Assuming only one column matches, otherwise refine the selection logic
|
445 |
-
model_predictions = df[filtered_columns[0]]
|
446 |
-
actual_values = real_values_df[f'{category_prefix}_entsoe']
|
447 |
-
actual_values = actual_values.dropna()
|
448 |
-
# Align both series by their common indices
|
449 |
-
common_indices = model_predictions.index.intersection(actual_values.index)
|
450 |
-
aligned_model_predictions = model_predictions.loc[common_indices]
|
451 |
-
aligned_actual_values = actual_values.loc[common_indices]
|
452 |
-
# Calculate MAE for the model
|
453 |
-
model_mae = calculate_mae(aligned_actual_values, aligned_model_predictions)
|
454 |
-
# Calculate MAE for the entsoe forecast
|
455 |
-
entsoe_forecast = real_values_df[f'{category_prefix}_forecast_entsoe'].loc[common_indices]
|
456 |
-
entsoe_mae = calculate_mae(aligned_actual_values, entsoe_forecast)
|
457 |
-
# Calculate MAE ratio
|
458 |
-
mae_ratio = model_mae / entsoe_mae
|
459 |
-
mae_ratios.append(mae_ratio)
|
460 |
-
hours_with_data.append(hour)
|
461 |
-
# Plot the MAE ratio for this model as points
|
462 |
-
if mae_ratios: # Only plot if there's data
|
463 |
-
fig.add_trace(go.Scatter(
|
464 |
-
x=hours_with_data, # The hours where we have data
|
465 |
-
y=mae_ratios,
|
466 |
-
mode='markers+lines', # Plot as points connected by lines
|
467 |
-
name=model_key,
|
468 |
-
line=dict(color=base_color),
|
469 |
-
marker=dict(color=base_color, size=8) # Customize marker size
|
470 |
-
))
|
471 |
-
# Update layout
|
472 |
-
fig.update_layout(
|
473 |
-
title=f'{category_prefix}: rMAE<span style="font-size:11px;">ENTSO-E</span> by hour of Forecasting.',
|
474 |
-
xaxis_title='Hour of Forecast',
|
475 |
-
yaxis_title='MAE Ratio (Model / entsoe)',
|
476 |
-
legend=dict(
|
477 |
-
orientation="h",
|
478 |
-
yanchor="bottom",
|
479 |
-
y=1.02,
|
480 |
-
xanchor="center",
|
481 |
-
x=0.5
|
482 |
-
)
|
483 |
-
)
|
484 |
-
return fig
|
485 |
-
|
486 |
-
|
487 |
-
|
488 |
-
def plot_mae_comparison_clock(df_dict, category_prefix, title, real_values_df):
|
489 |
-
hours = list(range(24))
|
490 |
-
if category_prefix=='Load':
|
491 |
-
model_colors = {
|
492 |
-
'LightGBM_with_Forecast_elia': '#1F77B4', # Blue
|
493 |
-
'LightGBM': '#2CA02C', # Green
|
494 |
-
'Naive': '#FF7F0E' # Orange
|
495 |
-
}
|
496 |
-
else:
|
497 |
-
model_colors = {
|
498 |
-
'LightGBM_with_Forecast_elia': '#1F77B4', # Blue
|
499 |
-
'LightGBM': '#2CA02C', # Green
|
500 |
-
'Naive': '#FF7F0E' # Orange
|
501 |
-
}
|
502 |
-
|
503 |
-
fig = go.Figure()
|
504 |
-
|
505 |
-
for model_key, base_color in model_colors.items():
|
506 |
-
hours_with_data = []
|
507 |
-
mae_ratios = []
|
508 |
-
|
509 |
-
for hour in hours:
|
510 |
-
file_name = f'Predictions_{hour}h.csv'
|
511 |
-
df = df_dict.get(file_name, None)
|
512 |
-
if df is None:
|
513 |
-
continue
|
514 |
-
|
515 |
-
if isinstance(df.index, pd.DatetimeIndex):
|
516 |
-
first_day = df.index.min().normalize()
|
517 |
-
last_day = df.index.max().normalize()
|
518 |
-
df = df[df.index.normalize() != first_day]
|
519 |
-
df = df[df.index.normalize() != last_day]
|
520 |
-
|
521 |
-
filtered_columns = [col for col in df.columns if col.startswith(f"{category_prefix}_entsoe") and model_key in col]
|
522 |
-
if not filtered_columns:
|
523 |
-
print(f"No matching columns for {model_key} at hour {hour}. Skipping...")
|
524 |
-
continue
|
525 |
-
|
526 |
-
model_predictions = df[filtered_columns[0]]
|
527 |
-
actual_values = real_values_df[f'{category_prefix}_entsoe']
|
528 |
-
actual_values = actual_values.dropna()
|
529 |
-
|
530 |
-
common_indices = model_predictions.index.intersection(actual_values.index)
|
531 |
-
aligned_model_predictions = model_predictions.loc[common_indices]
|
532 |
-
aligned_actual_values = actual_values.loc[common_indices]
|
533 |
-
|
534 |
-
model_mae = calculate_mae(aligned_actual_values, aligned_model_predictions)
|
535 |
-
entsoe_forecast = real_values_df[f'{category_prefix}_forecast_entsoe'].loc[common_indices]
|
536 |
-
entsoe_mae = calculate_mae(aligned_actual_values, entsoe_forecast)
|
537 |
-
|
538 |
-
mae_ratio = model_mae / entsoe_mae
|
539 |
-
mae_ratios.append(mae_ratio)
|
540 |
-
hours_with_data.append(hour)
|
541 |
-
|
542 |
-
if mae_ratios:
|
543 |
-
fig.add_trace(go.Scatterpolar(
|
544 |
-
r=mae_ratios + [mae_ratios[0]], # Ensure closure of the polar plot
|
545 |
-
theta=[h * 15 for h in hours_with_data] + [0], # Ensure closure at 0 degrees
|
546 |
-
mode='lines+markers',
|
547 |
-
name=model_key,
|
548 |
-
line=dict(color=base_color),
|
549 |
-
marker=dict(color=base_color, size=8)
|
550 |
-
))
|
551 |
-
else:
|
552 |
-
print(f"No data to plot for {model_key}.") # Debugging print
|
553 |
-
|
554 |
-
fig.update_layout(
|
555 |
-
polar=dict(
|
556 |
-
radialaxis=dict(visible=True, range=[0, max(max(mae_ratios), 1.0) * 1.1] if mae_ratios else [0, 1.0]),
|
557 |
-
angularaxis=dict(tickmode='array', tickvals=[h * 15 for h in hours], ticktext=hours)
|
558 |
-
),
|
559 |
-
title=f'{category_prefix}: rMAE<span style="font-size:11px;">ENTSO-E</span> by Hour of Forecasting',
|
560 |
-
showlegend=True
|
561 |
-
)
|
562 |
-
|
563 |
-
return fig
|
564 |
|
565 |
|
566 |
# Scatter plots for error distribution
|
@@ -637,10 +378,8 @@ elif section == 'Forecasts':
|
|
637 |
|
638 |
$\text{rMAE} = \frac{\text{MAE}}{MAE_{\text{Persistence Model}}}$
|
639 |
|
640 |
-
|
641 |
""")
|
642 |
|
643 |
-
|
644 |
|
645 |
st.subheader('ACF plots of Errors')
|
646 |
st.write('The below plots show the ACF (Auto-Correlation Function) for the errors of all three data fields obtained from ENTSO-E: Solar, Wind and Load.')
|
@@ -669,44 +408,151 @@ elif section == 'Insights':
|
|
669 |
""")
|
670 |
|
671 |
# Scatter plots for correlation between wind, solar, and load
|
672 |
-
st.subheader('Correlation between Wind, Solar, and
|
673 |
-
st.write('The below scatter plots are made for checking whether there exists a correlation between
|
674 |
-
|
675 |
-
|
676 |
-
|
677 |
-
|
678 |
-
|
679 |
-
|
680 |
-
|
681 |
-
|
682 |
-
|
683 |
-
|
684 |
-
|
685 |
-
|
686 |
-
|
687 |
-
|
688 |
-
|
689 |
-
|
690 |
-
|
691 |
-
|
692 |
-
|
693 |
-
|
694 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
695 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
696 |
|
697 |
-
|
698 |
-
|
|
|
|
|
|
|
|
|
|
|
699 |
|
700 |
-
|
701 |
-
|
702 |
-
if weather_col in data.columns and actual_col in data.columns:
|
703 |
-
clean_label = actual_col.replace('_entsoe', '')
|
704 |
|
705 |
-
|
706 |
-
|
707 |
-
|
708 |
-
|
709 |
-
fig.update_layout(title=f'{weather_col} vs {actual_col}')
|
710 |
-
st.plotly_chart(fig)
|
711 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
712 |
|
|
|
129 |
|
130 |
|
131 |
countries = {
|
132 |
+
'Overall': 'Overall',
|
133 |
'Netherlands': 'NL',
|
134 |
'Germany': 'DE',
|
135 |
'France': 'FR',
|
|
|
144 |
|
145 |
selected_country = st.sidebar.selectbox('Select Country', list(countries.keys()))
|
146 |
|
147 |
+
# Ensure the date range provides two dates
|
148 |
|
|
|
|
|
149 |
|
150 |
+
# Sidebar with radio buttons for different sections
|
151 |
+
if selected_country != 'Overall':
|
152 |
+
st.sidebar.subheader("Section")
|
153 |
+
st.sidebar.caption("Select the type of information you want to explore.")
|
154 |
+
section = st.sidebar.radio('', ['Data Quality', 'Forecasts Quality', 'Insights'], index=1)
|
155 |
+
date_range = st.sidebar.date_input("Select Date Range for Metrics Calculation:",
|
156 |
value=(pd.to_datetime("2024-01-01"), pd.to_datetime(pd.Timestamp('today'))))
|
157 |
+
if len(date_range) == 2:
|
158 |
+
start_date = pd.Timestamp(date_range[0])
|
159 |
+
end_date = pd.Timestamp(date_range[1])
|
160 |
+
else:
|
161 |
+
st.error("Please select a valid date range.")
|
162 |
+
st.stop()
|
163 |
|
|
|
|
|
|
|
|
|
164 |
else:
|
165 |
+
section = None # No section is shown when "Overall" is selected
|
|
|
|
|
|
|
|
|
166 |
|
167 |
+
if selected_country == 'Overall':
|
168 |
+
data = None # You can set data to None or a specific dataset based on your logic
|
169 |
+
section = None # No section selected when "Overall" is chosen
|
170 |
+
else:
|
171 |
+
country_code = countries[selected_country]
|
172 |
+
if country_code == 'BE':
|
173 |
+
data = Data_BE
|
174 |
+
weather_columns = ['Temperature', 'Wind Speed Onshore', 'Wind Speed Offshore']
|
175 |
+
data['Temperature'] = data['temperature_2m_8']
|
176 |
+
data['Wind Speed Offshore'] = data['wind_speed_100m_4']
|
177 |
+
data['Wind Speed Onshore'] = data['wind_speed_100m_8']
|
178 |
+
|
179 |
+
elif country_code == 'DE':
|
180 |
+
data = Data_DE
|
181 |
+
weather_columns = ['Temperature', 'Wind Speed']
|
182 |
+
data['Temperature'] = data['temperature_2m']
|
183 |
+
data['Wind Speed'] = data['wind_speed_100m']
|
184 |
+
|
185 |
+
elif country_code == 'NL':
|
186 |
+
data = Data_NL
|
187 |
+
weather_columns = ['Temperature', 'Wind Speed']
|
188 |
+
data['Temperature'] = data['temperature_2m']
|
189 |
+
data['Wind Speed'] = data['wind_speed_100m']
|
190 |
+
|
191 |
+
elif country_code == 'FR':
|
192 |
+
data = Data_FR
|
193 |
+
weather_columns = ['Temperature', 'Wind Speed']
|
194 |
+
data['Temperature'] = data['temperature_2m']
|
195 |
+
data['Wind Speed'] = data['wind_speed_100m']
|
196 |
|
197 |
def add_feature(df2, df_main):
|
198 |
#df_main.index = pd.to_datetime(df_main.index)
|
|
|
210 |
forecast_columns = [
|
211 |
'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
|
212 |
|
213 |
+
if section == 'Data Quality':
|
|
|
|
|
|
|
|
|
|
|
214 |
|
215 |
st.header('Data Quality')
|
216 |
|
|
|
279 |
st.write('<b><u>Extreme/Nonsensical values (%)</u></b>: Values that are considered implausible such as negative or out-of-bound values i.e., (generation<0) or (generation>capacity)', unsafe_allow_html=True)
|
280 |
|
281 |
# Section 2: Forecasts
|
282 |
+
elif section == 'Forecasts Quality':
|
283 |
|
284 |
st.header('Forecast Quality')
|
285 |
|
286 |
# Time series for last 1 week
|
|
|
287 |
last_week = data.loc[data.index >= (data.index[-1] - pd.Timedelta(days=7))]
|
288 |
+
st.write('The below plots show the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform from the past week.')
|
289 |
|
|
|
|
|
290 |
num_per_var=2
|
291 |
|
292 |
forecast_columns_line=forecast_columns
|
|
|
302 |
fig.update_layout(title=f'Forecasts vs Actual for {actual_col}', xaxis_title='Date', yaxis_title='Value [MW]')
|
303 |
|
304 |
st.plotly_chart(fig)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
305 |
|
306 |
|
307 |
# Scatter plots for error distribution
|
|
|
378 |
|
379 |
$\text{rMAE} = \frac{\text{MAE}}{MAE_{\text{Persistence Model}}}$
|
380 |
|
|
|
381 |
""")
|
382 |
|
|
|
383 |
|
384 |
st.subheader('ACF plots of Errors')
|
385 |
st.write('The below plots show the ACF (Auto-Correlation Function) for the errors of all three data fields obtained from ENTSO-E: Solar, Wind and Load.')
|
|
|
408 |
""")
|
409 |
|
410 |
# Scatter plots for correlation between wind, solar, and load
|
411 |
+
st.subheader('Correlation between Wind, Solar, Load and Weather Features')
|
412 |
+
st.write('The below scatter plots are made for checking whether there exists a correlation between the data fields obtained: Solar, Wind, Load and Weather Features.')
|
413 |
+
|
414 |
+
selected_columns=['Load_entsoe', 'Solar_entsoe', 'Wind_offshore_entsoe', 'Wind_onshore_entsoe'] + weather_columns
|
415 |
+
selected_df=data[selected_columns]
|
416 |
+
selected_df.columns = [col.replace('_entsoe', '').replace('_', ' ') for col in selected_df.columns]
|
417 |
+
selected_df = selected_df.dropna()
|
418 |
+
print(selected_df)
|
419 |
+
sns.set_theme(style="ticks")
|
420 |
+
pairplot_fig = sns.pairplot(selected_df)
|
421 |
+
|
422 |
+
# Display the pairplot in Streamlit
|
423 |
+
st.pyplot(pairplot_fig)
|
424 |
+
|
425 |
+
elif selected_country == 'Overall':
|
426 |
+
st.subheader("Net Load Error Map")
|
427 |
+
st.write("""
|
428 |
+
The net load error map highlights the error in the forecasted versus actual net load for each country.
|
429 |
+
Hover over each country to see details on the latest net load error and the timestamp of the last recorded data.
|
430 |
+
""")
|
431 |
+
|
432 |
+
def plot_net_load_error_map(data_dict):
|
433 |
+
# Define forecast columns used in calculation
|
434 |
+
|
435 |
+
def calculate_net_load_error(df):
|
436 |
+
filter_df = df[forecast_columns].dropna()
|
437 |
+
net_load = filter_df['Load_entsoe'] - filter_df['Wind_onshore_entsoe'] - filter_df['Wind_offshore_entsoe'] - filter_df['Solar_entsoe']
|
438 |
+
net_load_forecast = filter_df['Load_forecast_entsoe'] - filter_df['Wind_onshore_forecast_entsoe'] - filter_df['Wind_offshore_forecast_entsoe'] - filter_df['Solar_forecast_entsoe']
|
439 |
+
error = (net_load - net_load_forecast).iloc[-1]
|
440 |
+
date = filter_df.index[-1].strftime("%Y-%m-%d %H:%M") # Get the latest date in string format
|
441 |
+
return error, date
|
442 |
+
|
443 |
+
# Calculate net load errors and dates for each country
|
444 |
+
net_load_errors = {country_name: calculate_net_load_error(data) for country_name, data in data_dict.items()}
|
445 |
+
|
446 |
+
# Create DataFrame for Folium with additional date column
|
447 |
+
df_net_load_error = pd.DataFrame({
|
448 |
+
'country': list(net_load_errors.keys()),
|
449 |
+
'net_load_error': [v[0] for v in net_load_errors.values()],
|
450 |
+
'date': [v[1] for v in net_load_errors.values()]
|
451 |
+
})
|
452 |
+
|
453 |
+
# Load the GeoJSON file
|
454 |
+
geojson_url = "https://raw.githubusercontent.com/python-visualization/folium/master/examples/data/world-countries.json"
|
455 |
+
geo_data = requests.get(geojson_url).json()
|
456 |
+
|
457 |
+
# Filter GeoJSON to only include the selected countries
|
458 |
+
selected_countries = list(data_dict.keys()) # Get the list of selected countries (Belgium, France, Germany, Netherlands)
|
459 |
+
filtered_geojson = {
|
460 |
+
"type": "FeatureCollection",
|
461 |
+
"features": [feature for feature in geo_data["features"] if feature["properties"]["name"] in selected_countries]
|
462 |
+
}
|
463 |
|
464 |
+
# Merge the geojson with the error and date data
|
465 |
+
for feature in filtered_geojson["features"]:
|
466 |
+
country_name = feature["properties"]["name"]
|
467 |
+
row = df_net_load_error[df_net_load_error['country'] == country_name]
|
468 |
+
if not row.empty:
|
469 |
+
feature["properties"]["net_load_error"] = row.iloc[0]["net_load_error"]
|
470 |
+
feature["properties"]["date"] = row.iloc[0]["date"]
|
471 |
+
|
472 |
+
# Initialize the Folium map centered on Central Europe
|
473 |
+
m = folium.Map(location=[51, 10], zoom_start=5, tiles="cartodb positron")
|
474 |
+
|
475 |
+
# Add choropleth layer to map net load errors by country
|
476 |
+
folium.Choropleth(
|
477 |
+
geo_data=filtered_geojson,
|
478 |
+
name="choropleth",
|
479 |
+
data=df_net_load_error,
|
480 |
+
columns=["country", "net_load_error"],
|
481 |
+
key_on="feature.properties.name",
|
482 |
+
fill_color="RdYlBu", # Use a more vibrant color palette
|
483 |
+
fill_opacity=0.7,
|
484 |
+
line_opacity=0.5,
|
485 |
+
line_color="black", # Neutral border color
|
486 |
+
legend_name="Net Load Error"
|
487 |
+
).add_to(m)
|
488 |
+
|
489 |
+
# Add a GeoJson layer with custom tooltip for country, error, and date
|
490 |
+
folium.GeoJson(
|
491 |
+
filtered_geojson,
|
492 |
+
style_function=lambda x: {'fillOpacity': 0, 'color': 'black', 'weight': 0},
|
493 |
+
tooltip=folium.GeoJsonTooltip(
|
494 |
+
fields=["name", "net_load_error", "date"],
|
495 |
+
aliases=["Country:", "Net Load Error:", "Date:"],
|
496 |
+
localize=True
|
497 |
+
)
|
498 |
+
).add_to(m)
|
499 |
+
|
500 |
+
# Display Folium map in Streamlit
|
501 |
+
st_folium(m, width=700, height=600)
|
502 |
|
503 |
+
# Data dictionary with full country names
|
504 |
+
data_dict = {
|
505 |
+
'Belgium': Data_BE,
|
506 |
+
'France': Data_FR,
|
507 |
+
'Germany': Data_DE,
|
508 |
+
'Netherlands': Data_NL
|
509 |
+
}
|
510 |
|
511 |
+
# Call the function to plot the map
|
512 |
+
plot_net_load_error_map(data_dict)
|
|
|
|
|
513 |
|
514 |
+
st.subheader("rMAE of Forecasts published on ENTSO-E TP")
|
515 |
+
st.write("""
|
516 |
+
The radar chart below compares the forecast accuracy across Load, Onshore Wind, Offshore Wind, and Solar for each country.
|
517 |
+
""")
|
|
|
|
|
518 |
|
519 |
+
def calculate_mae(actual, forecast):
|
520 |
+
return np.mean(np.abs(actual - forecast))
|
521 |
+
|
522 |
+
# Function to calculate persistence MAE
|
523 |
+
def calculate_persistence_mae(data, shift_hours):
|
524 |
+
return np.mean(np.abs(data - data.shift(shift_hours)))
|
525 |
+
|
526 |
+
# Function to calculate rMAE for each country
|
527 |
+
def calculate_rmae_for_country(df):
|
528 |
+
rmae = {}
|
529 |
+
rmae['Load'] = calculate_mae(df['Load_entsoe'], df['Load_forecast_entsoe']) / calculate_persistence_mae(df['Load_entsoe'], 168)
|
530 |
+
rmae['Wind_onshore'] = calculate_mae(df['Wind_onshore_entsoe'], df['Wind_onshore_forecast_entsoe']) / calculate_persistence_mae(df['Wind_onshore_entsoe'], 24)
|
531 |
+
rmae['Wind_offshore'] = calculate_mae(df['Wind_offshore_entsoe'], df['Wind_offshore_forecast_entsoe']) / calculate_persistence_mae(df['Wind_offshore_entsoe'], 24)
|
532 |
+
rmae['Solar'] = calculate_mae(df['Solar_entsoe'], df['Solar_forecast_entsoe']) / calculate_persistence_mae(df['Solar_entsoe'], 24)
|
533 |
+
return rmae
|
534 |
+
|
535 |
+
# Function to create rMAE DataFrame
|
536 |
+
def create_rmae_dataframe(data_dict):
|
537 |
+
rmae_values = {'Country': [], 'Load': [], 'Wind_onshore': [], 'Wind_offshore': [], 'Solar': []}
|
538 |
+
for country_name, df in data_dict.items():
|
539 |
+
df_filtered = df[forecast_columns].dropna()
|
540 |
+
rmae = calculate_rmae_for_country(df_filtered)
|
541 |
+
rmae_values['Country'].append(country_name)
|
542 |
+
for key in rmae:
|
543 |
+
rmae_values[key].append(rmae[key])
|
544 |
+
return pd.DataFrame(rmae_values)
|
545 |
+
|
546 |
+
# Function to plot radar chart
|
547 |
+
def plot_rmae_radar_chart(rmae_df):
|
548 |
+
fig = go.Figure()
|
549 |
+
angles = ['Load', 'Wind_onshore', 'Wind_offshore', 'Solar']
|
550 |
+
for _, row in rmae_df.iterrows():
|
551 |
+
fig.add_trace(go.Scatterpolar(r=[row[angle] for angle in angles], theta=angles, fill='toself', name=row['Country']))
|
552 |
+
fig.update_layout(polar=dict(radialaxis=dict(visible=True, range=[0, 2])), showlegend=True, title="rMAE Radar Chart by Country")
|
553 |
+
st.plotly_chart(fig)
|
554 |
+
|
555 |
+
# Main execution to create and display radar plot
|
556 |
+
rmae_df = create_rmae_dataframe(data_dict)
|
557 |
+
plot_rmae_radar_chart(rmae_df)
|
558 |
|