mmmapms commited on
Commit
3a7655a
1 Parent(s): 9df8181

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +196 -203
app.py CHANGED
@@ -103,6 +103,14 @@ if github_token:
103
  Data_PT=load_GitHub(github_token, 'PT_Entsoe_UTC.csv', hour, after_10_min)
104
  Data_ES=load_GitHub(github_token, 'ES_Entsoe_UTC.csv', hour, after_10_min)
105
  Data_AT=load_GitHub(github_token, 'AT_Entsoe_UTC.csv', hour, after_10_min)
 
 
 
 
 
 
 
 
106
 
107
  Data_BE=convert_European_time(Data_BE, 'Europe/Brussels')
108
  Data_FR=convert_European_time(Data_FR, 'Europe/Paris')
@@ -111,6 +119,14 @@ if github_token:
111
  Data_PT=convert_European_time(Data_PT, 'Europe/Lisbon')
112
  Data_ES=convert_European_time(Data_ES, 'Europe/Madrid')
113
  Data_AT=convert_European_time(Data_AT, 'Europe/Vienna')
 
 
 
 
 
 
 
 
114
 
115
 
116
  else:
@@ -131,7 +147,7 @@ with col2:
131
  st.image("energyville_logo.png", width=100)
132
 
133
 
134
- st.write("**Evaluate and analyze ENTSO-E Transparency Platform data quality, forecast accuracy, and energy trends for Portugal, Spain, Belgium, France, Germany, Austria, and the Netherlands.**")
135
 
136
  upper_space.markdown("""
137
   
@@ -142,13 +158,60 @@ countries = {
142
  'Overall': 'Overall',
143
  'Austria': 'AT',
144
  'Belgium': 'BE',
 
 
145
  'France': 'FR',
146
- 'Germany': 'DE',
 
 
 
 
 
 
147
  'Netherlands': 'NL',
148
  'Portugal': 'PT',
149
  'Spain': 'ES',
150
  }
151
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
 
153
  st.sidebar.header('Filters')
154
 
@@ -157,9 +220,6 @@ st.sidebar.caption("Choose the country for which you want to display data or for
157
 
158
  selected_country = st.sidebar.selectbox('Select Country', list(countries.keys()))
159
 
160
- # Ensure the date range provides two dates
161
-
162
-
163
  # Sidebar with radio buttons for different sections
164
  if selected_country != 'Overall':
165
  st.sidebar.subheader("Section")
@@ -168,78 +228,26 @@ if selected_country != 'Overall':
168
  else:
169
  section = None # No section is shown when "Overall" is selected
170
 
171
-
172
- forecast_columns_with_wind_offshore = [
173
- 'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
174
-
175
- forecast_columns_no_wind_offshore = [
176
- 'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
177
-
178
  if selected_country == 'Overall':
179
  data = None # You can set data to None or a specific dataset based on your logic
180
  section = None # No section selected when "Overall" is chosen
181
  else:
182
  country_code = countries[selected_country]
 
 
 
 
 
183
  if country_code == 'BE':
184
- forecast_columns=forecast_columns_with_wind_offshore
185
- data = Data_BE
186
  weather_columns = ['Temperature', 'Wind Speed Onshore', 'Wind Speed Offshore']
187
  data['Temperature'] = data['temperature_2m_8']
188
- data['Wind Speed Offshore'] = data['wind_speed_100m_4']
189
  data['Wind Speed Onshore'] = data['wind_speed_100m_8']
190
-
191
- elif country_code == 'DE':
192
- forecast_columns=forecast_columns_with_wind_offshore
193
- data = Data_DE
194
- weather_columns = ['Temperature', 'Wind Speed']
195
- data['Temperature'] = data['temperature_2m']
196
- data['Wind Speed'] = data['wind_speed_100m']
197
-
198
- elif country_code == 'NL':
199
- forecast_columns=forecast_columns_with_wind_offshore
200
- data = Data_NL
201
- weather_columns = ['Temperature', 'Wind Speed']
202
- data['Temperature'] = data['temperature_2m']
203
- data['Wind Speed'] = data['wind_speed_100m']
204
-
205
- elif country_code == 'FR':
206
- forecast_columns=forecast_columns_with_wind_offshore
207
- data = Data_FR
208
- weather_columns = ['Temperature', 'Wind Speed']
209
- data['Temperature'] = data['temperature_2m']
210
- data['Wind Speed'] = data['wind_speed_100m']
211
-
212
- elif country_code == 'PT':
213
- forecast_columns=forecast_columns_with_wind_offshore
214
- data = Data_PT
215
- weather_columns = ['Temperature', 'Wind Speed']
216
- data['Temperature'] = data['temperature_2m']
217
- data['Wind Speed'] = data['wind_speed_100m']
218
- elif country_code == 'AT':
219
- forecast_columns=forecast_columns_no_wind_offshore
220
- data = Data_AT
221
- weather_columns = ['Temperature', 'Wind Speed']
222
- data['Temperature'] = data['temperature_2m']
223
- data['Wind Speed'] = data['wind_speed_100m']
224
- elif country_code == 'ES':
225
- forecast_columns=forecast_columns_no_wind_offshore
226
- data = Data_ES
227
  weather_columns = ['Temperature', 'Wind Speed']
228
  data['Temperature'] = data['temperature_2m']
229
  data['Wind Speed'] = data['wind_speed_100m']
230
 
231
- def add_feature(df2, df_main):
232
- #df_main.index = pd.to_datetime(df_main.index)
233
- #df2.index = pd.to_datetime(df2.index)
234
- df_combined = df_main.combine_first(df2)
235
- last_date_df1 = df_main.index.max()
236
- first_date_df2 = df2.index.min()
237
- if first_date_df2 == last_date_df1 + pd.Timedelta(hours=1):
238
- df_combined = pd.concat([df_main, df2[df2.index > last_date_df1]], axis=0)
239
- #df_combined.reset_index(inplace=True)
240
- return df_combined
241
- #data.index = data.index.tz_localize('UTC')
242
-
243
 
244
  if section == 'Data Quality':
245
 
@@ -247,7 +255,7 @@ if section == 'Data Quality':
247
 
248
  st.write('The table below presents the data quality metrics focusing on the percentage of missing values and the occurrence of extreme or nonsensical values for the selected country.')
249
 
250
- yesterday_midnight = pd.Timestamp(datetime.now().date() - pd.Timedelta(days=1)).replace(hour=23, minute=59, second=59)
251
 
252
  # Filter data until the end of yesterday (midnight)
253
  data_quality = data[data.index <= yesterday_midnight]
@@ -256,46 +264,33 @@ if section == 'Data Quality':
256
  missing_values = data_quality[forecast_columns].isna().mean() * 100
257
  missing_values = missing_values.round(2)
258
 
259
- installed_capacities = {
260
- 'FR': { 'Solar': 17419, 'Wind Offshore': 1483, 'Wind Onshore': 22134},
261
- 'DE': { 'Solar': 73821, 'Wind Offshore': 8386, 'Wind Onshore': 59915},
262
- 'BE': { 'Solar': 8789, 'Wind Offshore': 2262, 'Wind Onshore': 3053},
263
- 'NL': { 'Solar': 22590, 'Wind Offshore': 3220, 'Wind Onshore': 6190},
264
- 'PT': { 'Solar': 1811, 'Wind Offshore': 25, 'Wind Onshore': 5333},
265
- 'ES': { 'Solar': 23867, 'Wind Onshore': 30159},
266
- 'AT': { 'Solar': 7294, 'Wind Onshore': 4021 }
267
- }
268
-
269
  if country_code not in installed_capacities:
270
- st.error(f"Installed capacities not defined for country code '{country_code}'.")
271
- st.stop()
272
-
273
-
274
- # Report % of extreme, impossible values for the selected country
275
- capacities = installed_capacities[country_code]
276
- extreme_values = {}
277
-
278
- for col in forecast_columns:
279
- if 'Solar_entsoe' in col:
280
- extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Solar'])).mean() * 100
281
- elif 'Solar_forecast_entsoe' in col:
282
- extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Solar'])).mean() * 100
283
- elif 'Wind_onshore_entsoe' in col:
284
- extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Onshore'])).mean() * 100
285
- elif 'Wind_onshore_forecast_entsoe' in col:
286
- extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Onshore'])).mean() * 100
287
- elif 'Wind_offshore_entsoe' in col:
288
- extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Offshore'])).mean() * 100
289
- elif 'Wind_offshore_forecast_entsoe' in col:
290
- extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Offshore'])).mean() * 100
291
- elif 'Load_entsoe' in col:
292
- extreme_values[col] = ((data_quality[col] < 0)).mean() * 100
293
- elif 'Load_forecast_entsoe' in col:
294
- extreme_values[col] = ((data_quality[col] < 0)).mean() * 100
295
-
296
 
297
  extreme_values = pd.Series(extreme_values).round(2)
298
-
299
  # Combine all metrics into one DataFrame
300
  metrics_df = pd.DataFrame({
301
  'Missing Values (%)': missing_values,
@@ -316,7 +311,6 @@ if section == 'Data Quality':
316
  st.write('<b><u>Missing values (%)</u></b>: Percentage of missing values in the dataset', unsafe_allow_html=True)
317
  st.write('<b><u>Extreme/Nonsensical values (%)</u></b>: Values that are considered implausible such as negative or out-of-bound values i.e., (generation<0) or (generation>capacity)', unsafe_allow_html=True)
318
 
319
- # Section 2: Forecasts
320
  elif section == 'Forecasts Quality':
321
 
322
  st.header('Forecast Quality')
@@ -326,20 +320,21 @@ elif section == 'Forecasts Quality':
326
  st.write('The below plot shows the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform from the past week.')
327
 
328
  # Options for selecting the data to display
329
- if country_code!='ES' and country_code!='AT':
330
-
331
  variable_options = {
332
  "Load": ("Load_entsoe", "Load_forecast_entsoe"),
333
  "Solar": ("Solar_entsoe", "Solar_forecast_entsoe"),
334
  "Wind Onshore": ("Wind_onshore_entsoe", "Wind_onshore_forecast_entsoe"),
335
  "Wind Offshore": ("Wind_offshore_entsoe", "Wind_offshore_forecast_entsoe")
336
  }
337
- else:
338
  variable_options = {
339
  "Load": ("Load_entsoe", "Load_forecast_entsoe"),
340
  "Solar": ("Solar_entsoe", "Solar_forecast_entsoe"),
341
  "Wind Onshore": ("Wind_onshore_entsoe", "Wind_onshore_forecast_entsoe"),
342
  }
 
 
343
 
344
  # Dropdown to select the variable
345
  selected_variable = st.selectbox("Select Variable for Line PLot", list(variable_options.keys()))
@@ -375,12 +370,9 @@ elif section == 'Forecasts Quality':
375
  fig.update_layout(title=f'Error Distribution for {selected_variable}')
376
 
377
  st.plotly_chart(fig)
378
-
379
-
380
 
381
  st.subheader('Accuracy Metrics (Sorted by rMAE):')
382
 
383
-
384
  date_range = st.date_input(
385
  "Select Date Range for Metrics Calculation:",
386
  value=(pd.to_datetime("2024-01-01"), pd.to_datetime(pd.Timestamp('today')))
@@ -399,10 +391,13 @@ elif section == 'Forecasts Quality':
399
 
400
  data = data.loc[start_date:end_date]
401
 
402
- if country_code!='ES' and country_code!='AT':
403
  accuracy_metrics = pd.DataFrame(columns=['MAE', 'rMAE'], index=['Load', 'Solar', 'Wind Onshore', 'Wind Offshore'])
404
- else:
405
  accuracy_metrics = pd.DataFrame(columns=['MAE', 'rMAE'], index=['Load', 'Solar', 'Wind Onshore'])
 
 
 
406
 
407
  for i in range(0, len(forecast_columns), 2):
408
  actual_col = forecast_columns[i]
@@ -506,7 +501,6 @@ elif section == 'Forecasts Quality':
506
  # Optionally calculate and store ACF values for further analysis if needed
507
  acf_values = acf(error.dropna(), nlags=240)
508
 
509
- # Section 3: Insights
510
  elif section == 'Insights':
511
  st.header("Insights")
512
 
@@ -523,10 +517,12 @@ elif section == 'Insights':
523
  resampled_data = data_2024.resample('D').mean() # Resample to daily mean
524
 
525
  # Select the necessary columns for the scatter plot
526
- if country_code!='ES' and country_code!='AT':
527
  selected_columns = ['Load_entsoe', 'Solar_entsoe', 'Wind_offshore_entsoe', 'Wind_onshore_entsoe'] + weather_columns
528
- else:
529
  selected_columns = ['Load_entsoe', 'Solar_entsoe', 'Wind_onshore_entsoe'] + weather_columns
 
 
530
 
531
  selected_df = resampled_data[selected_columns]
532
  selected_df.columns = [col.replace('_entsoe', '').replace('_', ' ') for col in selected_df.columns]
@@ -543,93 +539,95 @@ elif section == 'Insights':
543
 
544
  elif selected_country == 'Overall':
545
 
546
- st.subheader("Net Load Error Map")
547
- st.write("""
548
- The net load error map highlights the error in the forecasted versus actual net load for each country.
549
- Hover over each country to see details on the latest net load error and the timestamp (with the time zone of the corresponding country) of the last recorded data.
550
- """)
551
-
552
  def get_forecast_columns(country_code):
553
- if country_code in ['Belgium', 'Germany', 'Netherlands', 'France', 'Portugal']:
554
- return ['Load_entsoe', 'Wind_onshore_entsoe', 'Solar_entsoe', 'Load_forecast_entsoe', 'Wind_onshore_forecast_entsoe', 'Solar_forecast_entsoe', 'Wind_offshore_entsoe', 'Wind_offshore_forecast_entsoe']
 
 
555
  else:
556
- return ['Load_entsoe', 'Wind_onshore_entsoe', 'Solar_entsoe', 'Load_forecast_entsoe', 'Wind_onshore_forecast_entsoe', 'Solar_forecast_entsoe']
557
-
558
- def plot_net_load_error_map(data_dict):
559
- # Define forecast columns used in calculation
560
 
561
- def calculate_net_load_error(df, country_code):
562
- forecast_columns = get_forecast_columns(country_code)
563
- filter_df = df[forecast_columns].dropna()
564
 
565
- # Initialize net_load and net_load_forecast with Load and other available data
566
- net_load = filter_df['Load_entsoe'] - filter_df['Wind_onshore_entsoe'] - filter_df['Solar_entsoe']
567
- net_load_forecast = filter_df['Load_forecast_entsoe'] - filter_df['Wind_onshore_forecast_entsoe'] - filter_df['Solar_forecast_entsoe']
568
 
569
- # Subtract Wind_offshore_entsoe if the column exists
570
- if 'Wind_offshore_entsoe' in filter_df.columns:
571
- net_load -= filter_df['Wind_offshore_entsoe']
572
 
573
- # Subtract Wind_offshore_forecast_entsoe if the column exists
574
- if 'Wind_offshore_forecast_entsoe' in filter_df.columns:
575
- net_load_forecast -= filter_df['Wind_offshore_forecast_entsoe']
576
 
577
- # Calculate the error based on the latest values
578
- error = (net_load_forecast - net_load).iloc[-1]
579
- date = filter_df.index[-1].strftime("%Y-%m-%d %H:%M") # Get the latest date in string format
580
 
581
- return error, date
582
 
 
583
  # Calculate net load errors and dates for each country
584
- net_load_errors = {country_name: calculate_net_load_error(data, country_name) for country_name, data in data_dict.items()}
 
 
 
585
 
586
- # Create DataFrame for Folium with additional date column
587
  df_net_load_error = pd.DataFrame({
588
- 'country': list(net_load_errors.keys()),
589
  'net_load_error': [v[0] for v in net_load_errors.values()],
590
  'date': [v[1] for v in net_load_errors.values()]
591
  })
592
 
593
- # Load the GeoJSON file
594
- geojson_url = "https://raw.githubusercontent.com/python-visualization/folium/master/examples/data/world-countries.json"
595
- geo_data = requests.get(geojson_url).json()
596
 
597
- # Filter GeoJSON to only include the selected countries
598
- selected_countries = list(data_dict.keys()) # Get the list of selected countries (Belgium, France, Germany, Netherlands)
599
- filtered_geojson = {
600
- "type": "FeatureCollection",
601
- "features": [feature for feature in geo_data["features"] if feature["properties"]["name"] in selected_countries]
602
- }
603
 
604
- # Merge the geojson with the error and date data
605
- for feature in filtered_geojson["features"]:
606
- country_name = feature["properties"]["name"]
607
- row = df_net_load_error[df_net_load_error['country'] == country_name]
608
- if not row.empty:
609
- feature["properties"]["net_load_error"] = row.iloc[0]["net_load_error"]
610
- feature["properties"]["date"] = row.iloc[0]["date"]
611
-
612
- # Initialize the Folium map centered on Central Europe
613
- m = folium.Map(location=[46.6034, 1.8883], zoom_start=4.5, tiles="cartodb positron")
614
-
615
- # Add choropleth layer to map net load errors by country
616
- folium.Choropleth(
617
- geo_data=filtered_geojson,
618
- name="choropleth",
619
- data=df_net_load_error,
620
- columns=["country", "net_load_error"],
621
- key_on="feature.properties.name",
622
- fill_color= "RdYlBu", #"RdYlBu", # Use a more vibrant color palette
623
- fill_opacity=0.7,
624
- line_opacity=0.5,
625
- line_color="black", # Neutral border color
626
- legend_name="Net Load Error [MW]"
627
- ).add_to(m)
628
 
629
- # Add a GeoJson layer with custom tooltip for country, error, and date
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
630
  folium.GeoJson(
631
- filtered_geojson,
632
- style_function=lambda x: {'fillOpacity': 0, 'color': 'black', 'weight': 0},
633
  tooltip=folium.GeoJsonTooltip(
634
  fields=["name", "net_load_error", "date"],
635
  aliases=["Country:", "Net Load Error [MW]:", "Date:"],
@@ -637,33 +635,18 @@ elif selected_country == 'Overall':
637
  )
638
  ).add_to(m)
639
 
640
- # Display Folium map in Streamlit
641
- st_folium(m, width=700, height=600)
642
-
643
- data_dict = {
644
- 'Belgium': Data_BE,
645
- 'France': Data_FR,
646
- 'Germany': Data_DE,
647
- 'Netherlands': Data_NL,
648
- 'Portugal': Data_PT,
649
- 'Austria': Data_AT,
650
- 'Spain': Data_ES,
651
- }
652
 
653
- plot_net_load_error_map(data_dict)
654
-
655
- st.subheader("rMAE of Forecasts published on ENTSO-E TP")
656
- st.write("""The rMAE of Forecasts chart compares the forecast accuracy of the predictions published by ENTSO-E Transparency Platform for Portugal, Spain, Belgium, France, Germany, Austria, and the Netherlands. It shows the rMAE for onshore wind, offshore wind (if any), solar, and load demand, highlighting how well forecasts perform relative to a basic persistence model across these countries and energy sectors.""")
657
 
658
- # Function to calculate MAE
659
  def calculate_mae(actual, forecast):
660
  return np.mean(np.abs(actual - forecast))
661
 
662
- # Function to calculate persistence MAE
663
  def calculate_persistence_mae(data, shift_hours):
664
  return np.mean(np.abs(data - data.shift(shift_hours)))
665
 
666
- # Function to calculate rMAE for each country
667
  def calculate_rmae_for_country(df):
668
  rmae = {}
669
  rmae['Load'] = calculate_mae(df['Load_entsoe'], df['Load_forecast_entsoe']) / calculate_persistence_mae(df['Load_entsoe'], 168)
@@ -679,7 +662,6 @@ elif selected_country == 'Overall':
679
 
680
  return rmae
681
 
682
- # Function to create rMAE DataFrame
683
  def create_rmae_dataframe(data_dict):
684
 
685
  rmae_values = {'Country': [], 'Load': [], 'Wind_onshore': [], 'Wind_offshore': [], 'Solar': []}
@@ -702,7 +684,6 @@ elif selected_country == 'Overall':
702
 
703
  return pd.DataFrame(rmae_values)
704
 
705
- # Function to plot radar chart
706
  def plot_rmae_radar_chart(rmae_df):
707
  fig = go.Figure()
708
 
@@ -728,9 +709,21 @@ elif selected_country == 'Overall':
728
  )
729
  st.plotly_chart(fig)
730
 
731
- # Main execution to create and display radar plot
 
 
 
 
 
 
 
 
 
 
 
732
  rmae_df = create_rmae_dataframe(data_dict)
733
  plot_rmae_radar_chart(rmae_df)
734
 
735
 
 
736
 
 
103
  Data_PT=load_GitHub(github_token, 'PT_Entsoe_UTC.csv', hour, after_10_min)
104
  Data_ES=load_GitHub(github_token, 'ES_Entsoe_UTC.csv', hour, after_10_min)
105
  Data_AT=load_GitHub(github_token, 'AT_Entsoe_UTC.csv', hour, after_10_min)
106
+ Data_IT_CALA=load_GitHub(github_token, 'IT_CALA_Entsoe_UTC.csv', hour, after_10_min)
107
+ Data_IT_CNOR=load_GitHub(github_token, 'IT_CNOR_Entsoe_UTC.csv', hour, after_10_min)
108
+ Data_IT_CSUD=load_GitHub(github_token, 'IT_CSUD_Entsoe_UTC.csv', hour, after_10_min)
109
+ Data_IT_NORD=load_GitHub(github_token, 'IT_NORD_Entsoe_UTC.csv', hour, after_10_min)
110
+ Data_IT_SICI=load_GitHub(github_token, 'IT_SICI_Entsoe_UTC.csv', hour, after_10_min)
111
+ Data_IT_SUD=load_GitHub(github_token, 'IT_SUD_Entsoe_UTC.csv', hour, after_10_min)
112
+ Data_DK_1=load_GitHub(github_token, 'DK_1_Entsoe_UTC.csv', hour, after_10_min)
113
+ Data_DK_2=load_GitHub(github_token, 'DK_2_Entsoe_UTC.csv', hour, after_10_min)
114
 
115
  Data_BE=convert_European_time(Data_BE, 'Europe/Brussels')
116
  Data_FR=convert_European_time(Data_FR, 'Europe/Paris')
 
119
  Data_PT=convert_European_time(Data_PT, 'Europe/Lisbon')
120
  Data_ES=convert_European_time(Data_ES, 'Europe/Madrid')
121
  Data_AT=convert_European_time(Data_AT, 'Europe/Vienna')
122
+ Data_IT_CALA = convert_European_time(Data_IT_CALA, 'Europe/Rome')
123
+ Data_IT_CNOR = convert_European_time(Data_IT_CNOR, 'Europe/Rome')
124
+ Data_IT_CSUD = convert_European_time(Data_IT_CSUD, 'Europe/Rome')
125
+ Data_IT_NORD = convert_European_time(Data_IT_NORD, 'Europe/Rome')
126
+ Data_IT_SICI = convert_European_time(Data_IT_SICI, 'Europe/Rome')
127
+ Data_IT_SUD = convert_European_time(Data_IT_SUD, 'Europe/Rome')
128
+ Data_DK_1 = convert_European_time(Data_DK_1, 'Europe/Copenhagen')
129
+ Data_DK_2 = convert_European_time(Data_DK_2, 'Europe/Copenhagen')
130
 
131
 
132
  else:
 
147
  st.image("energyville_logo.png", width=100)
148
 
149
 
150
+ st.write("**Evaluate and analyze ENTSO-E Transparency Platform data quality, forecast accuracy, and energy trends for Portugal, Spain, Belgium, France, Germany-Luxembourg, Austria, the Netherlands, Italy and Denmark.**")
151
 
152
  upper_space.markdown("""
153
  &nbsp;
 
158
  'Overall': 'Overall',
159
  'Austria': 'AT',
160
  'Belgium': 'BE',
161
+ 'Denmark 1': 'DK_1',
162
+ 'Denmark 2': 'DK_2',
163
  'France': 'FR',
164
+ 'Germany-Luxembourg': 'DE_LU',
165
+ 'Italy Calabria': 'IT_CALA',
166
+ 'Italy Central North': 'IT_CNOR',
167
+ 'Italy Central South': 'IT_CSUD',
168
+ 'Italy North': 'IT_NORD',
169
+ 'Italy Sicily': 'IT_SICI',
170
+ 'Italy South': 'IT_SUD',
171
  'Netherlands': 'NL',
172
  'Portugal': 'PT',
173
  'Spain': 'ES',
174
  }
175
 
176
+ data_dict = {
177
+ 'BE': Data_BE,
178
+ 'FR': Data_FR,
179
+ 'DE_LU': Data_DE,
180
+ 'NL': Data_NL,
181
+ 'PT': Data_PT,
182
+ 'AT': Data_AT,
183
+ 'ES': Data_ES,
184
+ 'IT_CALA': Data_IT_CALA,
185
+ 'IT_CNOR': Data_IT_CNOR,
186
+ 'IT_CSUD': Data_IT_CSUD,
187
+ 'IT_NORD': Data_IT_NORD,
188
+ 'IT_SICI': Data_IT_SICI,
189
+ 'IT_SUD': Data_IT_SUD,
190
+ 'DK_1': Data_DK_1,
191
+ 'DK_2': Data_DK_2,
192
+ }
193
+
194
+ countries_all_RES = ['BE', 'FR', 'NL', 'DE_LU', 'PT', 'DK_1', 'DK_2']
195
+ countries_no_offshore= ['AT', 'ES', 'IT_CALA', 'IT_CNOR', 'IT_CSUD', 'IT_NORD', 'IT_SICI', 'IT_SUD',]
196
+
197
+ installed_capacities = {
198
+ 'FR': { 'Solar': 17419, 'Wind Offshore': 1483, 'Wind Onshore': 22134},
199
+ 'DE_LU': { 'Solar': 73821, 'Wind Offshore': 8386, 'Wind Onshore': 59915},
200
+ 'BE': { 'Solar': 8789, 'Wind Offshore': 2262, 'Wind Onshore': 3053},
201
+ 'NL': { 'Solar': 22590, 'Wind Offshore': 3220, 'Wind Onshore': 6190},
202
+ 'PT': { 'Solar': 1811, 'Wind Offshore': 25, 'Wind Onshore': 5333},
203
+ 'ES': { 'Solar': 23867, 'Wind Onshore': 30159},
204
+ 'AT': { 'Solar': 7294, 'Wind Onshore': 4021 },
205
+ 'DK_1': { 'Solar': 2738, 'Wind Offshore': 1601, 'Wind Onshore': 4112},
206
+ 'DK_2': { 'Solar': 992, 'Wind Offshore': 1045, 'Wind Onshore': 748},
207
+ }
208
+
209
+ forecast_columns_all_RES = [
210
+ 'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
211
+
212
+ forecast_columns_no_wind_offshore = [
213
+ 'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
214
+
215
 
216
  st.sidebar.header('Filters')
217
 
 
220
 
221
  selected_country = st.sidebar.selectbox('Select Country', list(countries.keys()))
222
 
 
 
 
223
  # Sidebar with radio buttons for different sections
224
  if selected_country != 'Overall':
225
  st.sidebar.subheader("Section")
 
228
  else:
229
  section = None # No section is shown when "Overall" is selected
230
 
 
 
 
 
 
 
 
231
  if selected_country == 'Overall':
232
  data = None # You can set data to None or a specific dataset based on your logic
233
  section = None # No section selected when "Overall" is chosen
234
  else:
235
  country_code = countries[selected_country]
236
+ data = data_dict.get(country_code)
237
+ if country_code in countries_all_RES:
238
+ forecast_columns = forecast_columns_all_RES
239
+ elif country_code in countries_no_offshore:
240
+ forecast_columns = forecast_columns_no_wind_offshore
241
  if country_code == 'BE':
 
 
242
  weather_columns = ['Temperature', 'Wind Speed Onshore', 'Wind Speed Offshore']
243
  data['Temperature'] = data['temperature_2m_8']
 
244
  data['Wind Speed Onshore'] = data['wind_speed_100m_8']
245
+ data['Wind Speed Offshore'] = data['wind_speed_100m_4']
246
+ else:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
  weather_columns = ['Temperature', 'Wind Speed']
248
  data['Temperature'] = data['temperature_2m']
249
  data['Wind Speed'] = data['wind_speed_100m']
250
 
 
 
 
 
 
 
 
 
 
 
 
 
251
 
252
  if section == 'Data Quality':
253
 
 
255
 
256
  st.write('The table below presents the data quality metrics focusing on the percentage of missing values and the occurrence of extreme or nonsensical values for the selected country.')
257
 
258
+ yesterday_midnight = pd.Timestamp(datetime.datetime.now().date() - pd.Timedelta(days=1)).replace(hour=23, minute=59, second=59)
259
 
260
  # Filter data until the end of yesterday (midnight)
261
  data_quality = data[data.index <= yesterday_midnight]
 
264
  missing_values = data_quality[forecast_columns].isna().mean() * 100
265
  missing_values = missing_values.round(2)
266
 
 
 
 
 
 
 
 
 
 
 
267
  if country_code not in installed_capacities:
268
+ st.markdown(f"⚠️ **Installed capacities not available on ENTSO-E Transparency Platform for country code '{country_code}'. Therefore, cannot calculate Extreme/Nonsensical values.**")
269
+ # If capacities are not available, assign NaN to extreme_values and skip extreme value checking
270
+ extreme_values = {col: np.nan for col in forecast_columns}
271
+ else:
272
+ capacities = installed_capacities[country_code]
273
+ extreme_values = {}
274
+
275
+ for col in forecast_columns:
276
+ if 'Solar_entsoe' in col:
277
+ extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Solar'])).mean() * 100
278
+ elif 'Solar_forecast_entsoe' in col:
279
+ extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Solar'])).mean() * 100
280
+ elif 'Wind_onshore_entsoe' in col:
281
+ extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Onshore'])).mean() * 100
282
+ elif 'Wind_onshore_forecast_entsoe' in col:
283
+ extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Onshore'])).mean() * 100
284
+ elif 'Wind_offshore_entsoe' in col:
285
+ extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Offshore'])).mean() * 100
286
+ elif 'Wind_offshore_forecast_entsoe' in col:
287
+ extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Offshore'])).mean() * 100
288
+ elif 'Load_entsoe' in col:
289
+ extreme_values[col] = ((data_quality[col] < 0)).mean() * 100
290
+ elif 'Load_forecast_entsoe' in col:
291
+ extreme_values[col] = ((data_quality[col] < 0)).mean() * 100
 
 
292
 
293
  extreme_values = pd.Series(extreme_values).round(2)
 
294
  # Combine all metrics into one DataFrame
295
  metrics_df = pd.DataFrame({
296
  'Missing Values (%)': missing_values,
 
311
  st.write('<b><u>Missing values (%)</u></b>: Percentage of missing values in the dataset', unsafe_allow_html=True)
312
  st.write('<b><u>Extreme/Nonsensical values (%)</u></b>: Values that are considered implausible such as negative or out-of-bound values i.e., (generation<0) or (generation>capacity)', unsafe_allow_html=True)
313
 
 
314
  elif section == 'Forecasts Quality':
315
 
316
  st.header('Forecast Quality')
 
320
  st.write('The below plot shows the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform from the past week.')
321
 
322
  # Options for selecting the data to display
323
+ if country_code in countries_all_RES:
 
324
  variable_options = {
325
  "Load": ("Load_entsoe", "Load_forecast_entsoe"),
326
  "Solar": ("Solar_entsoe", "Solar_forecast_entsoe"),
327
  "Wind Onshore": ("Wind_onshore_entsoe", "Wind_onshore_forecast_entsoe"),
328
  "Wind Offshore": ("Wind_offshore_entsoe", "Wind_offshore_forecast_entsoe")
329
  }
330
+ elif country_code in countries_no_offshore:
331
  variable_options = {
332
  "Load": ("Load_entsoe", "Load_forecast_entsoe"),
333
  "Solar": ("Solar_entsoe", "Solar_forecast_entsoe"),
334
  "Wind Onshore": ("Wind_onshore_entsoe", "Wind_onshore_forecast_entsoe"),
335
  }
336
+ else:
337
+ print('Country code doesnt correspond.')
338
 
339
  # Dropdown to select the variable
340
  selected_variable = st.selectbox("Select Variable for Line PLot", list(variable_options.keys()))
 
370
  fig.update_layout(title=f'Error Distribution for {selected_variable}')
371
 
372
  st.plotly_chart(fig)
 
 
373
 
374
  st.subheader('Accuracy Metrics (Sorted by rMAE):')
375
 
 
376
  date_range = st.date_input(
377
  "Select Date Range for Metrics Calculation:",
378
  value=(pd.to_datetime("2024-01-01"), pd.to_datetime(pd.Timestamp('today')))
 
391
 
392
  data = data.loc[start_date:end_date]
393
 
394
+ if country_code in countries_all_RES:
395
  accuracy_metrics = pd.DataFrame(columns=['MAE', 'rMAE'], index=['Load', 'Solar', 'Wind Onshore', 'Wind Offshore'])
396
+ elif country_code in countries_no_offshore:
397
  accuracy_metrics = pd.DataFrame(columns=['MAE', 'rMAE'], index=['Load', 'Solar', 'Wind Onshore'])
398
+ else:
399
+ print('Country code doesnt correspond.')
400
+
401
 
402
  for i in range(0, len(forecast_columns), 2):
403
  actual_col = forecast_columns[i]
 
501
  # Optionally calculate and store ACF values for further analysis if needed
502
  acf_values = acf(error.dropna(), nlags=240)
503
 
 
504
  elif section == 'Insights':
505
  st.header("Insights")
506
 
 
517
  resampled_data = data_2024.resample('D').mean() # Resample to daily mean
518
 
519
  # Select the necessary columns for the scatter plot
520
+ if country_code in countries_all_RES:
521
  selected_columns = ['Load_entsoe', 'Solar_entsoe', 'Wind_offshore_entsoe', 'Wind_onshore_entsoe'] + weather_columns
522
+ elif country_code in countries_no_offshore:
523
  selected_columns = ['Load_entsoe', 'Solar_entsoe', 'Wind_onshore_entsoe'] + weather_columns
524
+ else:
525
+ print('Country code doesnt correspond.')
526
 
527
  selected_df = resampled_data[selected_columns]
528
  selected_df.columns = [col.replace('_entsoe', '').replace('_', ' ') for col in selected_df.columns]
 
539
 
540
  elif selected_country == 'Overall':
541
 
 
 
 
 
 
 
542
  def get_forecast_columns(country_code):
543
+ if country_code in countries_all_RES:
544
+ return forecast_columns_all_RES
545
+ elif country_code in countries_no_offshore:
546
+ return forecast_columns_no_wind_offshore
547
  else:
548
+ print('Country code doesnt correspond.')
 
 
 
549
 
550
+ def calculate_net_load_error(df, country_code):
551
+ forecast_columns = get_forecast_columns(country_code)
552
+ filter_df = df[forecast_columns].dropna()
553
 
554
+ # Initialize net_load and net_load_forecast with Load and other available data
555
+ net_load = filter_df['Load_entsoe'] - filter_df['Wind_onshore_entsoe'] - filter_df['Solar_entsoe']
556
+ net_load_forecast = filter_df['Load_forecast_entsoe'] - filter_df['Wind_onshore_forecast_entsoe'] - filter_df['Solar_forecast_entsoe']
557
 
558
+ # Subtract Wind_offshore_entsoe if the column exists
559
+ if 'Wind_offshore_entsoe' in filter_df.columns:
560
+ net_load -= filter_df['Wind_offshore_entsoe']
561
 
562
+ # Subtract Wind_offshore_forecast_entsoe if the column exists
563
+ if 'Wind_offshore_forecast_entsoe' in filter_df.columns:
564
+ net_load_forecast -= filter_df['Wind_offshore_forecast_entsoe']
565
 
566
+ # Calculate the error based on the latest values
567
+ error = (net_load_forecast - net_load).iloc[-1]
568
+ date = filter_df.index[-1].strftime("%Y-%m-%d %H:%M") # Get the latest date in string format
569
 
570
+ return error, date
571
 
572
+ def plot_net_load_error_map(data_dict):
573
  # Calculate net load errors and dates for each country
574
+ net_load_errors = {country_code: calculate_net_load_error(data, country_code) for country_code, data in data_dict.items()}
575
+
576
+ # Use country codes directly
577
+ selected_country_codes = list(data_dict.keys())
578
 
 
579
  df_net_load_error = pd.DataFrame({
580
+ 'zoneName': selected_country_codes,
581
  'net_load_error': [v[0] for v in net_load_errors.values()],
582
  'date': [v[1] for v in net_load_errors.values()]
583
  })
584
 
585
+ # Load the GeoJSON data using the entsoe library
586
+ date = pd.Timestamp.now()
587
+ geo_data = load_zones(selected_country_codes, date)
588
 
589
+ # Reset index to include 'zoneName' as a column
590
+ geo_data = geo_data.reset_index()
 
 
 
 
591
 
592
+ # Map country codes to country names
593
+ countries_code_to_name = {v: k for k, v in countries.items()}
594
+ geo_data['name'] = geo_data['zoneName'].map(countries_code_to_name)
595
+
596
+ # Merge net_load_error and date into geo_data
597
+ geo_data = geo_data.merge(df_net_load_error, on='zoneName', how='left')
598
+
599
+ # Initialize the Folium map
600
+ m = folium.Map(location=[46.6034, 1.8883], zoom_start=4, tiles="cartodb positron")
601
+
602
+ # Calculate the maximum absolute net load error for normalization
603
+ max_value = df_net_load_error['net_load_error'].abs().max()
 
 
 
 
 
 
 
 
 
 
 
 
604
 
605
+ # Create a colormap with lighter shades
606
+ colormap = branca.colormap.LinearColormap(
607
+ colors=['#0D92F4', 'white', '#C62E2E'], # Light blue to white to light coral
608
+ vmin=-max_value,
609
+ vmax=max_value,
610
+ caption='Net Load Error [MW]'
611
+ )
612
+
613
+ # Define the style function
614
+ def style_function(feature):
615
+ net_load_error = feature['properties']['net_load_error']
616
+ if net_load_error is None:
617
+ return {'fillOpacity': 0.5, 'color': 'grey', 'weight': 0.5}
618
+ else:
619
+ fill_color = colormap(net_load_error)
620
+ return {
621
+ 'fillColor': fill_color,
622
+ 'fillOpacity': 0.8, # Set a constant opacity
623
+ 'color': 'black',
624
+ 'weight': 0.5
625
+ }
626
+
627
+ # Add the GeoJson layer with the custom style_function
628
  folium.GeoJson(
629
+ geo_data,
630
+ style_function=style_function,
631
  tooltip=folium.GeoJsonTooltip(
632
  fields=["name", "net_load_error", "date"],
633
  aliases=["Country:", "Net Load Error [MW]:", "Date:"],
 
635
  )
636
  ).add_to(m)
637
 
638
+ # Add the colormap to the map
639
+ colormap.add_to(m)
 
 
 
 
 
 
 
 
 
 
640
 
641
+ # Display the map
642
+ st_folium(m, width=700, height=600)
 
 
643
 
 
644
  def calculate_mae(actual, forecast):
645
  return np.mean(np.abs(actual - forecast))
646
 
 
647
  def calculate_persistence_mae(data, shift_hours):
648
  return np.mean(np.abs(data - data.shift(shift_hours)))
649
 
 
650
  def calculate_rmae_for_country(df):
651
  rmae = {}
652
  rmae['Load'] = calculate_mae(df['Load_entsoe'], df['Load_forecast_entsoe']) / calculate_persistence_mae(df['Load_entsoe'], 168)
 
662
 
663
  return rmae
664
 
 
665
  def create_rmae_dataframe(data_dict):
666
 
667
  rmae_values = {'Country': [], 'Load': [], 'Wind_onshore': [], 'Wind_offshore': [], 'Solar': []}
 
684
 
685
  return pd.DataFrame(rmae_values)
686
 
 
687
  def plot_rmae_radar_chart(rmae_df):
688
  fig = go.Figure()
689
 
 
709
  )
710
  st.plotly_chart(fig)
711
 
712
+
713
+ st.subheader("Net Load Error Map")
714
+ st.write("""
715
+ The net load error map highlights the error in the forecasted versus actual net load for each country.
716
+ Hover over each country to see details on the latest net load error and the timestamp (with the time zone of the corresponding country) of the last recorded data.
717
+ """)
718
+
719
+ plot_net_load_error_map(data_dict)
720
+
721
+ st.subheader("rMAE of Forecasts published on ENTSO-E TP")
722
+ st.write("""The rMAE of Forecasts chart compares the forecast accuracy of the predictions published by ENTSO-E Transparency Platform for Portugal, Spain, Belgium, France, Germany-Luxembourg, Austria, the Netherlands, Italy and Denmark. It shows the rMAE for onshore wind, offshore wind (if any), solar, and load demand, highlighting how well forecasts perform relative to a basic persistence model across these countries and energy sectors.""")
723
+
724
  rmae_df = create_rmae_dataframe(data_dict)
725
  plot_rmae_radar_chart(rmae_df)
726
 
727
 
728
+
729