Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -103,6 +103,14 @@ if github_token:
|
|
103 |
Data_PT=load_GitHub(github_token, 'PT_Entsoe_UTC.csv', hour, after_10_min)
|
104 |
Data_ES=load_GitHub(github_token, 'ES_Entsoe_UTC.csv', hour, after_10_min)
|
105 |
Data_AT=load_GitHub(github_token, 'AT_Entsoe_UTC.csv', hour, after_10_min)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
|
107 |
Data_BE=convert_European_time(Data_BE, 'Europe/Brussels')
|
108 |
Data_FR=convert_European_time(Data_FR, 'Europe/Paris')
|
@@ -111,6 +119,14 @@ if github_token:
|
|
111 |
Data_PT=convert_European_time(Data_PT, 'Europe/Lisbon')
|
112 |
Data_ES=convert_European_time(Data_ES, 'Europe/Madrid')
|
113 |
Data_AT=convert_European_time(Data_AT, 'Europe/Vienna')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
|
115 |
|
116 |
else:
|
@@ -131,7 +147,7 @@ with col2:
|
|
131 |
st.image("energyville_logo.png", width=100)
|
132 |
|
133 |
|
134 |
-
st.write("**Evaluate and analyze ENTSO-E Transparency Platform data quality, forecast accuracy, and energy trends for Portugal, Spain, Belgium, France, Germany, Austria,
|
135 |
|
136 |
upper_space.markdown("""
|
137 |
|
@@ -142,13 +158,60 @@ countries = {
|
|
142 |
'Overall': 'Overall',
|
143 |
'Austria': 'AT',
|
144 |
'Belgium': 'BE',
|
|
|
|
|
145 |
'France': 'FR',
|
146 |
-
'Germany': '
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
'Netherlands': 'NL',
|
148 |
'Portugal': 'PT',
|
149 |
'Spain': 'ES',
|
150 |
}
|
151 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
152 |
|
153 |
st.sidebar.header('Filters')
|
154 |
|
@@ -157,9 +220,6 @@ st.sidebar.caption("Choose the country for which you want to display data or for
|
|
157 |
|
158 |
selected_country = st.sidebar.selectbox('Select Country', list(countries.keys()))
|
159 |
|
160 |
-
# Ensure the date range provides two dates
|
161 |
-
|
162 |
-
|
163 |
# Sidebar with radio buttons for different sections
|
164 |
if selected_country != 'Overall':
|
165 |
st.sidebar.subheader("Section")
|
@@ -168,78 +228,26 @@ if selected_country != 'Overall':
|
|
168 |
else:
|
169 |
section = None # No section is shown when "Overall" is selected
|
170 |
|
171 |
-
|
172 |
-
forecast_columns_with_wind_offshore = [
|
173 |
-
'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
|
174 |
-
|
175 |
-
forecast_columns_no_wind_offshore = [
|
176 |
-
'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
|
177 |
-
|
178 |
if selected_country == 'Overall':
|
179 |
data = None # You can set data to None or a specific dataset based on your logic
|
180 |
section = None # No section selected when "Overall" is chosen
|
181 |
else:
|
182 |
country_code = countries[selected_country]
|
|
|
|
|
|
|
|
|
|
|
183 |
if country_code == 'BE':
|
184 |
-
forecast_columns=forecast_columns_with_wind_offshore
|
185 |
-
data = Data_BE
|
186 |
weather_columns = ['Temperature', 'Wind Speed Onshore', 'Wind Speed Offshore']
|
187 |
data['Temperature'] = data['temperature_2m_8']
|
188 |
-
data['Wind Speed Offshore'] = data['wind_speed_100m_4']
|
189 |
data['Wind Speed Onshore'] = data['wind_speed_100m_8']
|
190 |
-
|
191 |
-
|
192 |
-
forecast_columns=forecast_columns_with_wind_offshore
|
193 |
-
data = Data_DE
|
194 |
-
weather_columns = ['Temperature', 'Wind Speed']
|
195 |
-
data['Temperature'] = data['temperature_2m']
|
196 |
-
data['Wind Speed'] = data['wind_speed_100m']
|
197 |
-
|
198 |
-
elif country_code == 'NL':
|
199 |
-
forecast_columns=forecast_columns_with_wind_offshore
|
200 |
-
data = Data_NL
|
201 |
-
weather_columns = ['Temperature', 'Wind Speed']
|
202 |
-
data['Temperature'] = data['temperature_2m']
|
203 |
-
data['Wind Speed'] = data['wind_speed_100m']
|
204 |
-
|
205 |
-
elif country_code == 'FR':
|
206 |
-
forecast_columns=forecast_columns_with_wind_offshore
|
207 |
-
data = Data_FR
|
208 |
-
weather_columns = ['Temperature', 'Wind Speed']
|
209 |
-
data['Temperature'] = data['temperature_2m']
|
210 |
-
data['Wind Speed'] = data['wind_speed_100m']
|
211 |
-
|
212 |
-
elif country_code == 'PT':
|
213 |
-
forecast_columns=forecast_columns_with_wind_offshore
|
214 |
-
data = Data_PT
|
215 |
-
weather_columns = ['Temperature', 'Wind Speed']
|
216 |
-
data['Temperature'] = data['temperature_2m']
|
217 |
-
data['Wind Speed'] = data['wind_speed_100m']
|
218 |
-
elif country_code == 'AT':
|
219 |
-
forecast_columns=forecast_columns_no_wind_offshore
|
220 |
-
data = Data_AT
|
221 |
-
weather_columns = ['Temperature', 'Wind Speed']
|
222 |
-
data['Temperature'] = data['temperature_2m']
|
223 |
-
data['Wind Speed'] = data['wind_speed_100m']
|
224 |
-
elif country_code == 'ES':
|
225 |
-
forecast_columns=forecast_columns_no_wind_offshore
|
226 |
-
data = Data_ES
|
227 |
weather_columns = ['Temperature', 'Wind Speed']
|
228 |
data['Temperature'] = data['temperature_2m']
|
229 |
data['Wind Speed'] = data['wind_speed_100m']
|
230 |
|
231 |
-
def add_feature(df2, df_main):
|
232 |
-
#df_main.index = pd.to_datetime(df_main.index)
|
233 |
-
#df2.index = pd.to_datetime(df2.index)
|
234 |
-
df_combined = df_main.combine_first(df2)
|
235 |
-
last_date_df1 = df_main.index.max()
|
236 |
-
first_date_df2 = df2.index.min()
|
237 |
-
if first_date_df2 == last_date_df1 + pd.Timedelta(hours=1):
|
238 |
-
df_combined = pd.concat([df_main, df2[df2.index > last_date_df1]], axis=0)
|
239 |
-
#df_combined.reset_index(inplace=True)
|
240 |
-
return df_combined
|
241 |
-
#data.index = data.index.tz_localize('UTC')
|
242 |
-
|
243 |
|
244 |
if section == 'Data Quality':
|
245 |
|
@@ -247,7 +255,7 @@ if section == 'Data Quality':
|
|
247 |
|
248 |
st.write('The table below presents the data quality metrics focusing on the percentage of missing values and the occurrence of extreme or nonsensical values for the selected country.')
|
249 |
|
250 |
-
yesterday_midnight = pd.Timestamp(datetime.now().date() - pd.Timedelta(days=1)).replace(hour=23, minute=59, second=59)
|
251 |
|
252 |
# Filter data until the end of yesterday (midnight)
|
253 |
data_quality = data[data.index <= yesterday_midnight]
|
@@ -256,46 +264,33 @@ if section == 'Data Quality':
|
|
256 |
missing_values = data_quality[forecast_columns].isna().mean() * 100
|
257 |
missing_values = missing_values.round(2)
|
258 |
|
259 |
-
installed_capacities = {
|
260 |
-
'FR': { 'Solar': 17419, 'Wind Offshore': 1483, 'Wind Onshore': 22134},
|
261 |
-
'DE': { 'Solar': 73821, 'Wind Offshore': 8386, 'Wind Onshore': 59915},
|
262 |
-
'BE': { 'Solar': 8789, 'Wind Offshore': 2262, 'Wind Onshore': 3053},
|
263 |
-
'NL': { 'Solar': 22590, 'Wind Offshore': 3220, 'Wind Onshore': 6190},
|
264 |
-
'PT': { 'Solar': 1811, 'Wind Offshore': 25, 'Wind Onshore': 5333},
|
265 |
-
'ES': { 'Solar': 23867, 'Wind Onshore': 30159},
|
266 |
-
'AT': { 'Solar': 7294, 'Wind Onshore': 4021 }
|
267 |
-
}
|
268 |
-
|
269 |
if country_code not in installed_capacities:
|
270 |
-
st.
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
extreme_values[col] = ((data_quality[col] < 0)).mean() * 100
|
295 |
-
|
296 |
|
297 |
extreme_values = pd.Series(extreme_values).round(2)
|
298 |
-
|
299 |
# Combine all metrics into one DataFrame
|
300 |
metrics_df = pd.DataFrame({
|
301 |
'Missing Values (%)': missing_values,
|
@@ -316,7 +311,6 @@ if section == 'Data Quality':
|
|
316 |
st.write('<b><u>Missing values (%)</u></b>: Percentage of missing values in the dataset', unsafe_allow_html=True)
|
317 |
st.write('<b><u>Extreme/Nonsensical values (%)</u></b>: Values that are considered implausible such as negative or out-of-bound values i.e., (generation<0) or (generation>capacity)', unsafe_allow_html=True)
|
318 |
|
319 |
-
# Section 2: Forecasts
|
320 |
elif section == 'Forecasts Quality':
|
321 |
|
322 |
st.header('Forecast Quality')
|
@@ -326,20 +320,21 @@ elif section == 'Forecasts Quality':
|
|
326 |
st.write('The below plot shows the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform from the past week.')
|
327 |
|
328 |
# Options for selecting the data to display
|
329 |
-
if country_code
|
330 |
-
|
331 |
variable_options = {
|
332 |
"Load": ("Load_entsoe", "Load_forecast_entsoe"),
|
333 |
"Solar": ("Solar_entsoe", "Solar_forecast_entsoe"),
|
334 |
"Wind Onshore": ("Wind_onshore_entsoe", "Wind_onshore_forecast_entsoe"),
|
335 |
"Wind Offshore": ("Wind_offshore_entsoe", "Wind_offshore_forecast_entsoe")
|
336 |
}
|
337 |
-
|
338 |
variable_options = {
|
339 |
"Load": ("Load_entsoe", "Load_forecast_entsoe"),
|
340 |
"Solar": ("Solar_entsoe", "Solar_forecast_entsoe"),
|
341 |
"Wind Onshore": ("Wind_onshore_entsoe", "Wind_onshore_forecast_entsoe"),
|
342 |
}
|
|
|
|
|
343 |
|
344 |
# Dropdown to select the variable
|
345 |
selected_variable = st.selectbox("Select Variable for Line PLot", list(variable_options.keys()))
|
@@ -375,12 +370,9 @@ elif section == 'Forecasts Quality':
|
|
375 |
fig.update_layout(title=f'Error Distribution for {selected_variable}')
|
376 |
|
377 |
st.plotly_chart(fig)
|
378 |
-
|
379 |
-
|
380 |
|
381 |
st.subheader('Accuracy Metrics (Sorted by rMAE):')
|
382 |
|
383 |
-
|
384 |
date_range = st.date_input(
|
385 |
"Select Date Range for Metrics Calculation:",
|
386 |
value=(pd.to_datetime("2024-01-01"), pd.to_datetime(pd.Timestamp('today')))
|
@@ -399,10 +391,13 @@ elif section == 'Forecasts Quality':
|
|
399 |
|
400 |
data = data.loc[start_date:end_date]
|
401 |
|
402 |
-
if country_code
|
403 |
accuracy_metrics = pd.DataFrame(columns=['MAE', 'rMAE'], index=['Load', 'Solar', 'Wind Onshore', 'Wind Offshore'])
|
404 |
-
|
405 |
accuracy_metrics = pd.DataFrame(columns=['MAE', 'rMAE'], index=['Load', 'Solar', 'Wind Onshore'])
|
|
|
|
|
|
|
406 |
|
407 |
for i in range(0, len(forecast_columns), 2):
|
408 |
actual_col = forecast_columns[i]
|
@@ -506,7 +501,6 @@ elif section == 'Forecasts Quality':
|
|
506 |
# Optionally calculate and store ACF values for further analysis if needed
|
507 |
acf_values = acf(error.dropna(), nlags=240)
|
508 |
|
509 |
-
# Section 3: Insights
|
510 |
elif section == 'Insights':
|
511 |
st.header("Insights")
|
512 |
|
@@ -523,10 +517,12 @@ elif section == 'Insights':
|
|
523 |
resampled_data = data_2024.resample('D').mean() # Resample to daily mean
|
524 |
|
525 |
# Select the necessary columns for the scatter plot
|
526 |
-
if country_code
|
527 |
selected_columns = ['Load_entsoe', 'Solar_entsoe', 'Wind_offshore_entsoe', 'Wind_onshore_entsoe'] + weather_columns
|
528 |
-
|
529 |
selected_columns = ['Load_entsoe', 'Solar_entsoe', 'Wind_onshore_entsoe'] + weather_columns
|
|
|
|
|
530 |
|
531 |
selected_df = resampled_data[selected_columns]
|
532 |
selected_df.columns = [col.replace('_entsoe', '').replace('_', ' ') for col in selected_df.columns]
|
@@ -543,93 +539,95 @@ elif section == 'Insights':
|
|
543 |
|
544 |
elif selected_country == 'Overall':
|
545 |
|
546 |
-
st.subheader("Net Load Error Map")
|
547 |
-
st.write("""
|
548 |
-
The net load error map highlights the error in the forecasted versus actual net load for each country.
|
549 |
-
Hover over each country to see details on the latest net load error and the timestamp (with the time zone of the corresponding country) of the last recorded data.
|
550 |
-
""")
|
551 |
-
|
552 |
def get_forecast_columns(country_code):
|
553 |
-
if country_code in
|
554 |
-
return
|
|
|
|
|
555 |
else:
|
556 |
-
|
557 |
-
|
558 |
-
def plot_net_load_error_map(data_dict):
|
559 |
-
# Define forecast columns used in calculation
|
560 |
|
561 |
-
|
562 |
-
|
563 |
-
|
564 |
|
565 |
-
|
566 |
-
|
567 |
-
|
568 |
|
569 |
-
|
570 |
-
|
571 |
-
|
572 |
|
573 |
-
|
574 |
-
|
575 |
-
|
576 |
|
577 |
-
|
578 |
-
|
579 |
-
|
580 |
|
581 |
-
|
582 |
|
|
|
583 |
# Calculate net load errors and dates for each country
|
584 |
-
net_load_errors = {
|
|
|
|
|
|
|
585 |
|
586 |
-
# Create DataFrame for Folium with additional date column
|
587 |
df_net_load_error = pd.DataFrame({
|
588 |
-
'
|
589 |
'net_load_error': [v[0] for v in net_load_errors.values()],
|
590 |
'date': [v[1] for v in net_load_errors.values()]
|
591 |
})
|
592 |
|
593 |
-
# Load the GeoJSON
|
594 |
-
|
595 |
-
geo_data =
|
596 |
|
597 |
-
#
|
598 |
-
|
599 |
-
filtered_geojson = {
|
600 |
-
"type": "FeatureCollection",
|
601 |
-
"features": [feature for feature in geo_data["features"] if feature["properties"]["name"] in selected_countries]
|
602 |
-
}
|
603 |
|
604 |
-
#
|
605 |
-
for
|
606 |
-
|
607 |
-
|
608 |
-
|
609 |
-
|
610 |
-
|
611 |
-
|
612 |
-
|
613 |
-
|
614 |
-
|
615 |
-
|
616 |
-
folium.Choropleth(
|
617 |
-
geo_data=filtered_geojson,
|
618 |
-
name="choropleth",
|
619 |
-
data=df_net_load_error,
|
620 |
-
columns=["country", "net_load_error"],
|
621 |
-
key_on="feature.properties.name",
|
622 |
-
fill_color= "RdYlBu", #"RdYlBu", # Use a more vibrant color palette
|
623 |
-
fill_opacity=0.7,
|
624 |
-
line_opacity=0.5,
|
625 |
-
line_color="black", # Neutral border color
|
626 |
-
legend_name="Net Load Error [MW]"
|
627 |
-
).add_to(m)
|
628 |
|
629 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
630 |
folium.GeoJson(
|
631 |
-
|
632 |
-
style_function=
|
633 |
tooltip=folium.GeoJsonTooltip(
|
634 |
fields=["name", "net_load_error", "date"],
|
635 |
aliases=["Country:", "Net Load Error [MW]:", "Date:"],
|
@@ -637,33 +635,18 @@ elif selected_country == 'Overall':
|
|
637 |
)
|
638 |
).add_to(m)
|
639 |
|
640 |
-
#
|
641 |
-
|
642 |
-
|
643 |
-
data_dict = {
|
644 |
-
'Belgium': Data_BE,
|
645 |
-
'France': Data_FR,
|
646 |
-
'Germany': Data_DE,
|
647 |
-
'Netherlands': Data_NL,
|
648 |
-
'Portugal': Data_PT,
|
649 |
-
'Austria': Data_AT,
|
650 |
-
'Spain': Data_ES,
|
651 |
-
}
|
652 |
|
653 |
-
|
654 |
-
|
655 |
-
st.subheader("rMAE of Forecasts published on ENTSO-E TP")
|
656 |
-
st.write("""The rMAE of Forecasts chart compares the forecast accuracy of the predictions published by ENTSO-E Transparency Platform for Portugal, Spain, Belgium, France, Germany, Austria, and the Netherlands. It shows the rMAE for onshore wind, offshore wind (if any), solar, and load demand, highlighting how well forecasts perform relative to a basic persistence model across these countries and energy sectors.""")
|
657 |
|
658 |
-
# Function to calculate MAE
|
659 |
def calculate_mae(actual, forecast):
|
660 |
return np.mean(np.abs(actual - forecast))
|
661 |
|
662 |
-
# Function to calculate persistence MAE
|
663 |
def calculate_persistence_mae(data, shift_hours):
|
664 |
return np.mean(np.abs(data - data.shift(shift_hours)))
|
665 |
|
666 |
-
# Function to calculate rMAE for each country
|
667 |
def calculate_rmae_for_country(df):
|
668 |
rmae = {}
|
669 |
rmae['Load'] = calculate_mae(df['Load_entsoe'], df['Load_forecast_entsoe']) / calculate_persistence_mae(df['Load_entsoe'], 168)
|
@@ -679,7 +662,6 @@ elif selected_country == 'Overall':
|
|
679 |
|
680 |
return rmae
|
681 |
|
682 |
-
# Function to create rMAE DataFrame
|
683 |
def create_rmae_dataframe(data_dict):
|
684 |
|
685 |
rmae_values = {'Country': [], 'Load': [], 'Wind_onshore': [], 'Wind_offshore': [], 'Solar': []}
|
@@ -702,7 +684,6 @@ elif selected_country == 'Overall':
|
|
702 |
|
703 |
return pd.DataFrame(rmae_values)
|
704 |
|
705 |
-
# Function to plot radar chart
|
706 |
def plot_rmae_radar_chart(rmae_df):
|
707 |
fig = go.Figure()
|
708 |
|
@@ -728,9 +709,21 @@ elif selected_country == 'Overall':
|
|
728 |
)
|
729 |
st.plotly_chart(fig)
|
730 |
|
731 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
732 |
rmae_df = create_rmae_dataframe(data_dict)
|
733 |
plot_rmae_radar_chart(rmae_df)
|
734 |
|
735 |
|
|
|
736 |
|
|
|
103 |
Data_PT=load_GitHub(github_token, 'PT_Entsoe_UTC.csv', hour, after_10_min)
|
104 |
Data_ES=load_GitHub(github_token, 'ES_Entsoe_UTC.csv', hour, after_10_min)
|
105 |
Data_AT=load_GitHub(github_token, 'AT_Entsoe_UTC.csv', hour, after_10_min)
|
106 |
+
Data_IT_CALA=load_GitHub(github_token, 'IT_CALA_Entsoe_UTC.csv', hour, after_10_min)
|
107 |
+
Data_IT_CNOR=load_GitHub(github_token, 'IT_CNOR_Entsoe_UTC.csv', hour, after_10_min)
|
108 |
+
Data_IT_CSUD=load_GitHub(github_token, 'IT_CSUD_Entsoe_UTC.csv', hour, after_10_min)
|
109 |
+
Data_IT_NORD=load_GitHub(github_token, 'IT_NORD_Entsoe_UTC.csv', hour, after_10_min)
|
110 |
+
Data_IT_SICI=load_GitHub(github_token, 'IT_SICI_Entsoe_UTC.csv', hour, after_10_min)
|
111 |
+
Data_IT_SUD=load_GitHub(github_token, 'IT_SUD_Entsoe_UTC.csv', hour, after_10_min)
|
112 |
+
Data_DK_1=load_GitHub(github_token, 'DK_1_Entsoe_UTC.csv', hour, after_10_min)
|
113 |
+
Data_DK_2=load_GitHub(github_token, 'DK_2_Entsoe_UTC.csv', hour, after_10_min)
|
114 |
|
115 |
Data_BE=convert_European_time(Data_BE, 'Europe/Brussels')
|
116 |
Data_FR=convert_European_time(Data_FR, 'Europe/Paris')
|
|
|
119 |
Data_PT=convert_European_time(Data_PT, 'Europe/Lisbon')
|
120 |
Data_ES=convert_European_time(Data_ES, 'Europe/Madrid')
|
121 |
Data_AT=convert_European_time(Data_AT, 'Europe/Vienna')
|
122 |
+
Data_IT_CALA = convert_European_time(Data_IT_CALA, 'Europe/Rome')
|
123 |
+
Data_IT_CNOR = convert_European_time(Data_IT_CNOR, 'Europe/Rome')
|
124 |
+
Data_IT_CSUD = convert_European_time(Data_IT_CSUD, 'Europe/Rome')
|
125 |
+
Data_IT_NORD = convert_European_time(Data_IT_NORD, 'Europe/Rome')
|
126 |
+
Data_IT_SICI = convert_European_time(Data_IT_SICI, 'Europe/Rome')
|
127 |
+
Data_IT_SUD = convert_European_time(Data_IT_SUD, 'Europe/Rome')
|
128 |
+
Data_DK_1 = convert_European_time(Data_DK_1, 'Europe/Copenhagen')
|
129 |
+
Data_DK_2 = convert_European_time(Data_DK_2, 'Europe/Copenhagen')
|
130 |
|
131 |
|
132 |
else:
|
|
|
147 |
st.image("energyville_logo.png", width=100)
|
148 |
|
149 |
|
150 |
+
st.write("**Evaluate and analyze ENTSO-E Transparency Platform data quality, forecast accuracy, and energy trends for Portugal, Spain, Belgium, France, Germany-Luxembourg, Austria, the Netherlands, Italy and Denmark.**")
|
151 |
|
152 |
upper_space.markdown("""
|
153 |
|
|
|
158 |
'Overall': 'Overall',
|
159 |
'Austria': 'AT',
|
160 |
'Belgium': 'BE',
|
161 |
+
'Denmark 1': 'DK_1',
|
162 |
+
'Denmark 2': 'DK_2',
|
163 |
'France': 'FR',
|
164 |
+
'Germany-Luxembourg': 'DE_LU',
|
165 |
+
'Italy Calabria': 'IT_CALA',
|
166 |
+
'Italy Central North': 'IT_CNOR',
|
167 |
+
'Italy Central South': 'IT_CSUD',
|
168 |
+
'Italy North': 'IT_NORD',
|
169 |
+
'Italy Sicily': 'IT_SICI',
|
170 |
+
'Italy South': 'IT_SUD',
|
171 |
'Netherlands': 'NL',
|
172 |
'Portugal': 'PT',
|
173 |
'Spain': 'ES',
|
174 |
}
|
175 |
|
176 |
+
data_dict = {
|
177 |
+
'BE': Data_BE,
|
178 |
+
'FR': Data_FR,
|
179 |
+
'DE_LU': Data_DE,
|
180 |
+
'NL': Data_NL,
|
181 |
+
'PT': Data_PT,
|
182 |
+
'AT': Data_AT,
|
183 |
+
'ES': Data_ES,
|
184 |
+
'IT_CALA': Data_IT_CALA,
|
185 |
+
'IT_CNOR': Data_IT_CNOR,
|
186 |
+
'IT_CSUD': Data_IT_CSUD,
|
187 |
+
'IT_NORD': Data_IT_NORD,
|
188 |
+
'IT_SICI': Data_IT_SICI,
|
189 |
+
'IT_SUD': Data_IT_SUD,
|
190 |
+
'DK_1': Data_DK_1,
|
191 |
+
'DK_2': Data_DK_2,
|
192 |
+
}
|
193 |
+
|
194 |
+
countries_all_RES = ['BE', 'FR', 'NL', 'DE_LU', 'PT', 'DK_1', 'DK_2']
|
195 |
+
countries_no_offshore= ['AT', 'ES', 'IT_CALA', 'IT_CNOR', 'IT_CSUD', 'IT_NORD', 'IT_SICI', 'IT_SUD',]
|
196 |
+
|
197 |
+
installed_capacities = {
|
198 |
+
'FR': { 'Solar': 17419, 'Wind Offshore': 1483, 'Wind Onshore': 22134},
|
199 |
+
'DE_LU': { 'Solar': 73821, 'Wind Offshore': 8386, 'Wind Onshore': 59915},
|
200 |
+
'BE': { 'Solar': 8789, 'Wind Offshore': 2262, 'Wind Onshore': 3053},
|
201 |
+
'NL': { 'Solar': 22590, 'Wind Offshore': 3220, 'Wind Onshore': 6190},
|
202 |
+
'PT': { 'Solar': 1811, 'Wind Offshore': 25, 'Wind Onshore': 5333},
|
203 |
+
'ES': { 'Solar': 23867, 'Wind Onshore': 30159},
|
204 |
+
'AT': { 'Solar': 7294, 'Wind Onshore': 4021 },
|
205 |
+
'DK_1': { 'Solar': 2738, 'Wind Offshore': 1601, 'Wind Onshore': 4112},
|
206 |
+
'DK_2': { 'Solar': 992, 'Wind Offshore': 1045, 'Wind Onshore': 748},
|
207 |
+
}
|
208 |
+
|
209 |
+
forecast_columns_all_RES = [
|
210 |
+
'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
|
211 |
+
|
212 |
+
forecast_columns_no_wind_offshore = [
|
213 |
+
'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
|
214 |
+
|
215 |
|
216 |
st.sidebar.header('Filters')
|
217 |
|
|
|
220 |
|
221 |
selected_country = st.sidebar.selectbox('Select Country', list(countries.keys()))
|
222 |
|
|
|
|
|
|
|
223 |
# Sidebar with radio buttons for different sections
|
224 |
if selected_country != 'Overall':
|
225 |
st.sidebar.subheader("Section")
|
|
|
228 |
else:
|
229 |
section = None # No section is shown when "Overall" is selected
|
230 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
231 |
if selected_country == 'Overall':
|
232 |
data = None # You can set data to None or a specific dataset based on your logic
|
233 |
section = None # No section selected when "Overall" is chosen
|
234 |
else:
|
235 |
country_code = countries[selected_country]
|
236 |
+
data = data_dict.get(country_code)
|
237 |
+
if country_code in countries_all_RES:
|
238 |
+
forecast_columns = forecast_columns_all_RES
|
239 |
+
elif country_code in countries_no_offshore:
|
240 |
+
forecast_columns = forecast_columns_no_wind_offshore
|
241 |
if country_code == 'BE':
|
|
|
|
|
242 |
weather_columns = ['Temperature', 'Wind Speed Onshore', 'Wind Speed Offshore']
|
243 |
data['Temperature'] = data['temperature_2m_8']
|
|
|
244 |
data['Wind Speed Onshore'] = data['wind_speed_100m_8']
|
245 |
+
data['Wind Speed Offshore'] = data['wind_speed_100m_4']
|
246 |
+
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
247 |
weather_columns = ['Temperature', 'Wind Speed']
|
248 |
data['Temperature'] = data['temperature_2m']
|
249 |
data['Wind Speed'] = data['wind_speed_100m']
|
250 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
251 |
|
252 |
if section == 'Data Quality':
|
253 |
|
|
|
255 |
|
256 |
st.write('The table below presents the data quality metrics focusing on the percentage of missing values and the occurrence of extreme or nonsensical values for the selected country.')
|
257 |
|
258 |
+
yesterday_midnight = pd.Timestamp(datetime.datetime.now().date() - pd.Timedelta(days=1)).replace(hour=23, minute=59, second=59)
|
259 |
|
260 |
# Filter data until the end of yesterday (midnight)
|
261 |
data_quality = data[data.index <= yesterday_midnight]
|
|
|
264 |
missing_values = data_quality[forecast_columns].isna().mean() * 100
|
265 |
missing_values = missing_values.round(2)
|
266 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
267 |
if country_code not in installed_capacities:
|
268 |
+
st.markdown(f"⚠️ **Installed capacities not available on ENTSO-E Transparency Platform for country code '{country_code}'. Therefore, cannot calculate Extreme/Nonsensical values.**")
|
269 |
+
# If capacities are not available, assign NaN to extreme_values and skip extreme value checking
|
270 |
+
extreme_values = {col: np.nan for col in forecast_columns}
|
271 |
+
else:
|
272 |
+
capacities = installed_capacities[country_code]
|
273 |
+
extreme_values = {}
|
274 |
+
|
275 |
+
for col in forecast_columns:
|
276 |
+
if 'Solar_entsoe' in col:
|
277 |
+
extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Solar'])).mean() * 100
|
278 |
+
elif 'Solar_forecast_entsoe' in col:
|
279 |
+
extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Solar'])).mean() * 100
|
280 |
+
elif 'Wind_onshore_entsoe' in col:
|
281 |
+
extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Onshore'])).mean() * 100
|
282 |
+
elif 'Wind_onshore_forecast_entsoe' in col:
|
283 |
+
extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Onshore'])).mean() * 100
|
284 |
+
elif 'Wind_offshore_entsoe' in col:
|
285 |
+
extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Offshore'])).mean() * 100
|
286 |
+
elif 'Wind_offshore_forecast_entsoe' in col:
|
287 |
+
extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Offshore'])).mean() * 100
|
288 |
+
elif 'Load_entsoe' in col:
|
289 |
+
extreme_values[col] = ((data_quality[col] < 0)).mean() * 100
|
290 |
+
elif 'Load_forecast_entsoe' in col:
|
291 |
+
extreme_values[col] = ((data_quality[col] < 0)).mean() * 100
|
|
|
|
|
292 |
|
293 |
extreme_values = pd.Series(extreme_values).round(2)
|
|
|
294 |
# Combine all metrics into one DataFrame
|
295 |
metrics_df = pd.DataFrame({
|
296 |
'Missing Values (%)': missing_values,
|
|
|
311 |
st.write('<b><u>Missing values (%)</u></b>: Percentage of missing values in the dataset', unsafe_allow_html=True)
|
312 |
st.write('<b><u>Extreme/Nonsensical values (%)</u></b>: Values that are considered implausible such as negative or out-of-bound values i.e., (generation<0) or (generation>capacity)', unsafe_allow_html=True)
|
313 |
|
|
|
314 |
elif section == 'Forecasts Quality':
|
315 |
|
316 |
st.header('Forecast Quality')
|
|
|
320 |
st.write('The below plot shows the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform from the past week.')
|
321 |
|
322 |
# Options for selecting the data to display
|
323 |
+
if country_code in countries_all_RES:
|
|
|
324 |
variable_options = {
|
325 |
"Load": ("Load_entsoe", "Load_forecast_entsoe"),
|
326 |
"Solar": ("Solar_entsoe", "Solar_forecast_entsoe"),
|
327 |
"Wind Onshore": ("Wind_onshore_entsoe", "Wind_onshore_forecast_entsoe"),
|
328 |
"Wind Offshore": ("Wind_offshore_entsoe", "Wind_offshore_forecast_entsoe")
|
329 |
}
|
330 |
+
elif country_code in countries_no_offshore:
|
331 |
variable_options = {
|
332 |
"Load": ("Load_entsoe", "Load_forecast_entsoe"),
|
333 |
"Solar": ("Solar_entsoe", "Solar_forecast_entsoe"),
|
334 |
"Wind Onshore": ("Wind_onshore_entsoe", "Wind_onshore_forecast_entsoe"),
|
335 |
}
|
336 |
+
else:
|
337 |
+
print('Country code doesnt correspond.')
|
338 |
|
339 |
# Dropdown to select the variable
|
340 |
selected_variable = st.selectbox("Select Variable for Line PLot", list(variable_options.keys()))
|
|
|
370 |
fig.update_layout(title=f'Error Distribution for {selected_variable}')
|
371 |
|
372 |
st.plotly_chart(fig)
|
|
|
|
|
373 |
|
374 |
st.subheader('Accuracy Metrics (Sorted by rMAE):')
|
375 |
|
|
|
376 |
date_range = st.date_input(
|
377 |
"Select Date Range for Metrics Calculation:",
|
378 |
value=(pd.to_datetime("2024-01-01"), pd.to_datetime(pd.Timestamp('today')))
|
|
|
391 |
|
392 |
data = data.loc[start_date:end_date]
|
393 |
|
394 |
+
if country_code in countries_all_RES:
|
395 |
accuracy_metrics = pd.DataFrame(columns=['MAE', 'rMAE'], index=['Load', 'Solar', 'Wind Onshore', 'Wind Offshore'])
|
396 |
+
elif country_code in countries_no_offshore:
|
397 |
accuracy_metrics = pd.DataFrame(columns=['MAE', 'rMAE'], index=['Load', 'Solar', 'Wind Onshore'])
|
398 |
+
else:
|
399 |
+
print('Country code doesnt correspond.')
|
400 |
+
|
401 |
|
402 |
for i in range(0, len(forecast_columns), 2):
|
403 |
actual_col = forecast_columns[i]
|
|
|
501 |
# Optionally calculate and store ACF values for further analysis if needed
|
502 |
acf_values = acf(error.dropna(), nlags=240)
|
503 |
|
|
|
504 |
elif section == 'Insights':
|
505 |
st.header("Insights")
|
506 |
|
|
|
517 |
resampled_data = data_2024.resample('D').mean() # Resample to daily mean
|
518 |
|
519 |
# Select the necessary columns for the scatter plot
|
520 |
+
if country_code in countries_all_RES:
|
521 |
selected_columns = ['Load_entsoe', 'Solar_entsoe', 'Wind_offshore_entsoe', 'Wind_onshore_entsoe'] + weather_columns
|
522 |
+
elif country_code in countries_no_offshore:
|
523 |
selected_columns = ['Load_entsoe', 'Solar_entsoe', 'Wind_onshore_entsoe'] + weather_columns
|
524 |
+
else:
|
525 |
+
print('Country code doesnt correspond.')
|
526 |
|
527 |
selected_df = resampled_data[selected_columns]
|
528 |
selected_df.columns = [col.replace('_entsoe', '').replace('_', ' ') for col in selected_df.columns]
|
|
|
539 |
|
540 |
elif selected_country == 'Overall':
|
541 |
|
|
|
|
|
|
|
|
|
|
|
|
|
542 |
def get_forecast_columns(country_code):
|
543 |
+
if country_code in countries_all_RES:
|
544 |
+
return forecast_columns_all_RES
|
545 |
+
elif country_code in countries_no_offshore:
|
546 |
+
return forecast_columns_no_wind_offshore
|
547 |
else:
|
548 |
+
print('Country code doesnt correspond.')
|
|
|
|
|
|
|
549 |
|
550 |
+
def calculate_net_load_error(df, country_code):
|
551 |
+
forecast_columns = get_forecast_columns(country_code)
|
552 |
+
filter_df = df[forecast_columns].dropna()
|
553 |
|
554 |
+
# Initialize net_load and net_load_forecast with Load and other available data
|
555 |
+
net_load = filter_df['Load_entsoe'] - filter_df['Wind_onshore_entsoe'] - filter_df['Solar_entsoe']
|
556 |
+
net_load_forecast = filter_df['Load_forecast_entsoe'] - filter_df['Wind_onshore_forecast_entsoe'] - filter_df['Solar_forecast_entsoe']
|
557 |
|
558 |
+
# Subtract Wind_offshore_entsoe if the column exists
|
559 |
+
if 'Wind_offshore_entsoe' in filter_df.columns:
|
560 |
+
net_load -= filter_df['Wind_offshore_entsoe']
|
561 |
|
562 |
+
# Subtract Wind_offshore_forecast_entsoe if the column exists
|
563 |
+
if 'Wind_offshore_forecast_entsoe' in filter_df.columns:
|
564 |
+
net_load_forecast -= filter_df['Wind_offshore_forecast_entsoe']
|
565 |
|
566 |
+
# Calculate the error based on the latest values
|
567 |
+
error = (net_load_forecast - net_load).iloc[-1]
|
568 |
+
date = filter_df.index[-1].strftime("%Y-%m-%d %H:%M") # Get the latest date in string format
|
569 |
|
570 |
+
return error, date
|
571 |
|
572 |
+
def plot_net_load_error_map(data_dict):
|
573 |
# Calculate net load errors and dates for each country
|
574 |
+
net_load_errors = {country_code: calculate_net_load_error(data, country_code) for country_code, data in data_dict.items()}
|
575 |
+
|
576 |
+
# Use country codes directly
|
577 |
+
selected_country_codes = list(data_dict.keys())
|
578 |
|
|
|
579 |
df_net_load_error = pd.DataFrame({
|
580 |
+
'zoneName': selected_country_codes,
|
581 |
'net_load_error': [v[0] for v in net_load_errors.values()],
|
582 |
'date': [v[1] for v in net_load_errors.values()]
|
583 |
})
|
584 |
|
585 |
+
# Load the GeoJSON data using the entsoe library
|
586 |
+
date = pd.Timestamp.now()
|
587 |
+
geo_data = load_zones(selected_country_codes, date)
|
588 |
|
589 |
+
# Reset index to include 'zoneName' as a column
|
590 |
+
geo_data = geo_data.reset_index()
|
|
|
|
|
|
|
|
|
591 |
|
592 |
+
# Map country codes to country names
|
593 |
+
countries_code_to_name = {v: k for k, v in countries.items()}
|
594 |
+
geo_data['name'] = geo_data['zoneName'].map(countries_code_to_name)
|
595 |
+
|
596 |
+
# Merge net_load_error and date into geo_data
|
597 |
+
geo_data = geo_data.merge(df_net_load_error, on='zoneName', how='left')
|
598 |
+
|
599 |
+
# Initialize the Folium map
|
600 |
+
m = folium.Map(location=[46.6034, 1.8883], zoom_start=4, tiles="cartodb positron")
|
601 |
+
|
602 |
+
# Calculate the maximum absolute net load error for normalization
|
603 |
+
max_value = df_net_load_error['net_load_error'].abs().max()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
604 |
|
605 |
+
# Create a colormap with lighter shades
|
606 |
+
colormap = branca.colormap.LinearColormap(
|
607 |
+
colors=['#0D92F4', 'white', '#C62E2E'], # Light blue to white to light coral
|
608 |
+
vmin=-max_value,
|
609 |
+
vmax=max_value,
|
610 |
+
caption='Net Load Error [MW]'
|
611 |
+
)
|
612 |
+
|
613 |
+
# Define the style function
|
614 |
+
def style_function(feature):
|
615 |
+
net_load_error = feature['properties']['net_load_error']
|
616 |
+
if net_load_error is None:
|
617 |
+
return {'fillOpacity': 0.5, 'color': 'grey', 'weight': 0.5}
|
618 |
+
else:
|
619 |
+
fill_color = colormap(net_load_error)
|
620 |
+
return {
|
621 |
+
'fillColor': fill_color,
|
622 |
+
'fillOpacity': 0.8, # Set a constant opacity
|
623 |
+
'color': 'black',
|
624 |
+
'weight': 0.5
|
625 |
+
}
|
626 |
+
|
627 |
+
# Add the GeoJson layer with the custom style_function
|
628 |
folium.GeoJson(
|
629 |
+
geo_data,
|
630 |
+
style_function=style_function,
|
631 |
tooltip=folium.GeoJsonTooltip(
|
632 |
fields=["name", "net_load_error", "date"],
|
633 |
aliases=["Country:", "Net Load Error [MW]:", "Date:"],
|
|
|
635 |
)
|
636 |
).add_to(m)
|
637 |
|
638 |
+
# Add the colormap to the map
|
639 |
+
colormap.add_to(m)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
640 |
|
641 |
+
# Display the map
|
642 |
+
st_folium(m, width=700, height=600)
|
|
|
|
|
643 |
|
|
|
644 |
def calculate_mae(actual, forecast):
|
645 |
return np.mean(np.abs(actual - forecast))
|
646 |
|
|
|
647 |
def calculate_persistence_mae(data, shift_hours):
|
648 |
return np.mean(np.abs(data - data.shift(shift_hours)))
|
649 |
|
|
|
650 |
def calculate_rmae_for_country(df):
|
651 |
rmae = {}
|
652 |
rmae['Load'] = calculate_mae(df['Load_entsoe'], df['Load_forecast_entsoe']) / calculate_persistence_mae(df['Load_entsoe'], 168)
|
|
|
662 |
|
663 |
return rmae
|
664 |
|
|
|
665 |
def create_rmae_dataframe(data_dict):
|
666 |
|
667 |
rmae_values = {'Country': [], 'Load': [], 'Wind_onshore': [], 'Wind_offshore': [], 'Solar': []}
|
|
|
684 |
|
685 |
return pd.DataFrame(rmae_values)
|
686 |
|
|
|
687 |
def plot_rmae_radar_chart(rmae_df):
|
688 |
fig = go.Figure()
|
689 |
|
|
|
709 |
)
|
710 |
st.plotly_chart(fig)
|
711 |
|
712 |
+
|
713 |
+
st.subheader("Net Load Error Map")
|
714 |
+
st.write("""
|
715 |
+
The net load error map highlights the error in the forecasted versus actual net load for each country.
|
716 |
+
Hover over each country to see details on the latest net load error and the timestamp (with the time zone of the corresponding country) of the last recorded data.
|
717 |
+
""")
|
718 |
+
|
719 |
+
plot_net_load_error_map(data_dict)
|
720 |
+
|
721 |
+
st.subheader("rMAE of Forecasts published on ENTSO-E TP")
|
722 |
+
st.write("""The rMAE of Forecasts chart compares the forecast accuracy of the predictions published by ENTSO-E Transparency Platform for Portugal, Spain, Belgium, France, Germany-Luxembourg, Austria, the Netherlands, Italy and Denmark. It shows the rMAE for onshore wind, offshore wind (if any), solar, and load demand, highlighting how well forecasts perform relative to a basic persistence model across these countries and energy sectors.""")
|
723 |
+
|
724 |
rmae_df = create_rmae_dataframe(data_dict)
|
725 |
plot_rmae_radar_chart(rmae_df)
|
726 |
|
727 |
|
728 |
+
|
729 |
|