mmmapms commited on
Commit
f611b2b
1 Parent(s): 6521280

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -39
app.py CHANGED
@@ -13,8 +13,16 @@ from statsmodels.graphics.tsaplots import plot_acf
13
  import matplotlib.pyplot as plt
14
 
15
 
 
 
 
 
 
 
 
16
  ##GET ALL FILES FROM GITHUB
17
- def load_GitHub(github_token, file_name):
 
18
  url = f'https://raw.githubusercontent.com/margaridamascarenhas/Transparency_Data/main/{file_name}'
19
  headers = {'Authorization': f'token {github_token}'}
20
 
@@ -31,12 +39,13 @@ def load_GitHub(github_token, file_name):
31
  else:
32
  print(f"Failed to download {file_name}. Status code: {response.status_code}")
33
  return None
34
-
35
- def load_forecast(github_token):
 
36
  predictions_dict = {}
37
  for hour in range(24):
38
  file_name = f'Predictions_{hour}h.csv'
39
- df = load_GitHub(github_token, file_name)
40
  if df is not None:
41
  predictions_dict[file_name] = df
42
  return predictions_dict
@@ -75,10 +84,12 @@ def simplify_model_names_in_index(df):
75
 
76
  return df
77
 
 
 
78
  github_token = st.secrets["GitHub_Token_KUL_Margarida"]
79
 
80
  if github_token:
81
- forecast_dict = load_forecast(github_token)
82
 
83
  historical_forecast=load_GitHub(github_token, 'Historical_forecast.csv')
84
 
@@ -140,12 +151,11 @@ upper_space.markdown("""
140
  """, unsafe_allow_html=True)
141
 
142
 
143
-
144
  countries = {
145
- 'Belgium': 'BE',
146
  'Netherlands': 'NL',
147
  'Germany': 'DE',
148
  'France': 'FR',
 
149
  }
150
 
151
 
@@ -231,9 +241,12 @@ if section == 'Data':
231
  st.header('Data Quality')
232
 
233
  st.write('The table below presents the data quality metrics for various energy-related datasets, focusing on the percentage of missing values and the occurrence of extreme or nonsensical values for the selected country.')
234
-
 
 
 
235
  # Report % of missing values
236
- missing_values = data[forecast_columns].isna().mean() * 100
237
  missing_values = missing_values.round(2)
238
 
239
  installed_capacities = {
@@ -254,21 +267,21 @@ if section == 'Data':
254
 
255
  for col in forecast_columns:
256
  if 'Solar_entsoe' in col:
257
- extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Solar'])).mean() * 100
258
  elif 'Solar_forecast_entsoe' in col:
259
- extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Solar'])).mean() * 100
260
  elif 'Wind_onshore_entsoe' in col:
261
- extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Onshore'])).mean() * 100
262
  elif 'Wind_onshore_forecast_entsoe' in col:
263
- extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Onshore'])).mean() * 100
264
  elif 'Wind_offshore_entsoe' in col:
265
- extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Offshore'])).mean() * 100
266
  elif 'Wind_offshore_forecast_entsoe' in col:
267
- extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Offshore'])).mean() * 100
268
  elif 'Load_entsoe' in col:
269
- extreme_values[col] = ((data[col] < 0)).mean() * 100
270
  elif 'Load_forecast_entsoe' in col:
271
- extreme_values[col] = ((data[col] < 0)).mean() * 100
272
 
273
 
274
  extreme_values = pd.Series(extreme_values).round(2)
@@ -300,29 +313,34 @@ elif section == 'Forecasts':
300
 
301
  # Time series for last 1 week
302
  st.subheader('Time Series: Last 1 Week')
303
- last_week = Data_BE.loc[Data_BE.index >= (data.index[-1] - pd.Timedelta(days=7))]
304
  st.write('The below plots show the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform between the selected data range.')
305
- forecast_columns_operational = [
306
- 'Load_entsoe','Load_forecast_entsoe', 'Load_LightGBMModel.7D.TimeCov.Temp.Forecast_elia', 'Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_onshore_LightGBMModel.1D.TimeCov.Temp.Forecast_elia','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Wind_offshore_LightGBMModel.1D.TimeCov.Temp.Forecast_elia','Solar_entsoe','Solar_forecast_entsoe', 'Solar_LightGBMModel.1D.TimeCov.Temp.Forecast_elia']
307
  forecast_columns = [
308
  'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
 
309
 
310
- operation_forecast_load=forecast_dict['Predictions_10h.csv'].filter(like='Load_', axis=1)
311
- operation_forecast_res=forecast_dict['Predictions_17h.csv'].filter(regex='^(?!Load_)')
312
- operation_forecast_load.columns = [col.replace('_entsoe.', '_').replace('Naive.7D', 'WeeklyNaiveSeasonal') for col in operation_forecast_load.columns]
313
- operation_forecast_res.columns = [col.replace('_entsoe.', '_').replace('Naive.1D', 'DailyNaiveSeasonal') for col in operation_forecast_res.columns]
314
- Historical_and_Load=add_feature(operation_forecast_load, historical_forecast)
315
- Historical_and_operational=add_feature(operation_forecast_res, Historical_and_Load)
 
316
 
317
- best_forecast = Historical_and_operational.filter(like='Forecast_elia', axis=1)
318
- df_combined = Historical_and_operational.join(Data_BE, how='inner')
319
- last_week_best_forecast = best_forecast.loc[best_forecast.index >= (best_forecast.index[-24] - pd.Timedelta(days=8))]
320
-
 
 
 
321
 
322
- for i in range(0, len(forecast_columns_operational), 3):
323
- actual_col = forecast_columns_operational[i]
324
- forecast_col = forecast_columns_operational[i + 1]
325
- my_forecast = forecast_columns_operational[i + 2]
 
326
 
327
 
328
  if forecast_col in data.columns:
@@ -332,7 +350,7 @@ elif section == 'Forecasts':
332
 
333
  if country_code=='BE':
334
  conformal=conformal_predictions(df_combined, actual_col, my_forecast)
335
- last_week_conformal = conformal.loc[conformal.index >= (conformal.index[-24] - pd.Timedelta(days=8))]
336
  if actual_col =='Load_entsoe':
337
  last_week_conformal = conformal.loc[conformal.index >= (conformal.index[-24] - pd.Timedelta(days=5))]
338
  fig.add_trace(go.Scatter(x=last_week_best_forecast.index, y=last_week_best_forecast[my_forecast], mode='lines', name='Forecast EDS'))
@@ -645,12 +663,13 @@ elif section == 'Forecasts':
645
  # Scatter plots for error distribution
646
  st.subheader('Error Distribution')
647
  st.write('The below scatter plots show the error distribution of all three fields: Solar, Wind and Load between the selected date range')
 
648
  for i in range(0, len(forecast_columns), 2):
649
  actual_col = forecast_columns[i]
650
  forecast_col = forecast_columns[i + 1]
651
- if forecast_col in data.columns:
652
- obs = data[actual_col]
653
- pred = data[forecast_col]
654
  error = pred - obs
655
 
656
  fig = px.scatter(x=obs, y=pred, labels={'x': 'Observed [MW]', 'y': 'Predicted by ENTSO-E [MW]'})
@@ -905,5 +924,4 @@ elif section == 'Insights':
905
  fig.update_layout(title=f'{weather_col} vs {actual_col}')
906
  st.plotly_chart(fig)
907
 
908
-
909
 
 
13
  import matplotlib.pyplot as plt
14
 
15
 
16
+ def get_current_time():
17
+ now = datetime.now()
18
+ current_hour = now.hour
19
+ current_minute = now.minute
20
+ # Return the hour and a boolean indicating if it is after the 10th minute
21
+ return current_hour, current_minute >= 10
22
+
23
  ##GET ALL FILES FROM GITHUB
24
+ @st.cache_data(show_spinner=False)
25
+ def load_GitHub(github_token, file_name, hour, after_10_min):
26
  url = f'https://raw.githubusercontent.com/margaridamascarenhas/Transparency_Data/main/{file_name}'
27
  headers = {'Authorization': f'token {github_token}'}
28
 
 
39
  else:
40
  print(f"Failed to download {file_name}. Status code: {response.status_code}")
41
  return None
42
+
43
+ @st.cache_data(show_spinner=False)
44
+ def load_forecast(github_token, hour, after_10_min):
45
  predictions_dict = {}
46
  for hour in range(24):
47
  file_name = f'Predictions_{hour}h.csv'
48
+ df = load_GitHub(github_token, file_name, hour, after_10_min)
49
  if df is not None:
50
  predictions_dict[file_name] = df
51
  return predictions_dict
 
84
 
85
  return df
86
 
87
+ current_hour, after_10_min = get_current_time()
88
+
89
  github_token = st.secrets["GitHub_Token_KUL_Margarida"]
90
 
91
  if github_token:
92
+ forecast_dict = load_forecast(github_token, current_hour, after_10_min)
93
 
94
  historical_forecast=load_GitHub(github_token, 'Historical_forecast.csv')
95
 
 
151
  """, unsafe_allow_html=True)
152
 
153
 
 
154
  countries = {
 
155
  'Netherlands': 'NL',
156
  'Germany': 'DE',
157
  'France': 'FR',
158
+ 'Belgium': 'BE',
159
  }
160
 
161
 
 
241
  st.header('Data Quality')
242
 
243
  st.write('The table below presents the data quality metrics for various energy-related datasets, focusing on the percentage of missing values and the occurrence of extreme or nonsensical values for the selected country.')
244
+ data_quality=data.iloc[:-28]
245
+ if country_code=='BE':
246
+ data_quality=data.iloc[:-5*24]
247
+ print(data_quality.tail(48))
248
  # Report % of missing values
249
+ missing_values = data_quality[forecast_columns].isna().mean() * 100
250
  missing_values = missing_values.round(2)
251
 
252
  installed_capacities = {
 
267
 
268
  for col in forecast_columns:
269
  if 'Solar_entsoe' in col:
270
+ extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Solar'])).mean() * 100
271
  elif 'Solar_forecast_entsoe' in col:
272
+ extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Solar'])).mean() * 100
273
  elif 'Wind_onshore_entsoe' in col:
274
+ extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Onshore'])).mean() * 100
275
  elif 'Wind_onshore_forecast_entsoe' in col:
276
+ extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Onshore'])).mean() * 100
277
  elif 'Wind_offshore_entsoe' in col:
278
+ extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Offshore'])).mean() * 100
279
  elif 'Wind_offshore_forecast_entsoe' in col:
280
+ extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Offshore'])).mean() * 100
281
  elif 'Load_entsoe' in col:
282
+ extreme_values[col] = ((data_quality[col] < 0)).mean() * 100
283
  elif 'Load_forecast_entsoe' in col:
284
+ extreme_values[col] = ((data_quality[col] < 0)).mean() * 100
285
 
286
 
287
  extreme_values = pd.Series(extreme_values).round(2)
 
313
 
314
  # Time series for last 1 week
315
  st.subheader('Time Series: Last 1 Week')
316
+ last_week = data.loc[data.index >= (data.index[-1] - pd.Timedelta(days=7))]
317
  st.write('The below plots show the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform between the selected data range.')
318
+
 
319
  forecast_columns = [
320
  'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
321
+ num_per_var=2
322
 
323
+ if country_code=='BE':
324
+ operation_forecast_load=forecast_dict['Predictions_10h.csv'].filter(like='Load_', axis=1)
325
+ operation_forecast_res=forecast_dict['Predictions_17h.csv'].filter(regex='^(?!Load_)')
326
+ operation_forecast_load.columns = [col.replace('_entsoe.', '_').replace('Naive.7D', 'WeeklyNaiveSeasonal') for col in operation_forecast_load.columns]
327
+ operation_forecast_res.columns = [col.replace('_entsoe.', '_').replace('Naive.1D', 'DailyNaiveSeasonal') for col in operation_forecast_res.columns]
328
+ Historical_and_Load=add_feature(operation_forecast_load, historical_forecast)
329
+ Historical_and_operational=add_feature(operation_forecast_res, Historical_and_Load)
330
 
331
+ best_forecast = Historical_and_operational.filter(like='Forecast_elia', axis=1)
332
+ df_combined = Historical_and_operational.join(Data_BE, how='inner')
333
+ last_week_best_forecast = best_forecast.loc[best_forecast.index >= (best_forecast.index[-24] - pd.Timedelta(days=7))]
334
+ num_per_var=3
335
+ forecast_columns_line=['Load_entsoe','Load_forecast_entsoe', 'Load_LightGBMModel.7D.TimeCov.Temp.Forecast_elia', 'Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_onshore_LightGBMModel.1D.TimeCov.Temp.Forecast_elia','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Wind_offshore_LightGBMModel.1D.TimeCov.Temp.Forecast_elia','Solar_entsoe','Solar_forecast_entsoe', 'Solar_LightGBMModel.1D.TimeCov.Temp.Forecast_elia']
336
+ else:
337
+ forecast_columns_line=forecast_columns
338
 
339
+ for i in range(0, len(forecast_columns_line), num_per_var):
340
+ actual_col = forecast_columns_line[i]
341
+ forecast_col = forecast_columns_line[i + 1]
342
+ if country_code=='BE':
343
+ my_forecast = forecast_columns_line[i + 2]
344
 
345
 
346
  if forecast_col in data.columns:
 
350
 
351
  if country_code=='BE':
352
  conformal=conformal_predictions(df_combined, actual_col, my_forecast)
353
+ last_week_conformal = conformal.loc[conformal.index >= (conformal.index[-24] - pd.Timedelta(days=7))]
354
  if actual_col =='Load_entsoe':
355
  last_week_conformal = conformal.loc[conformal.index >= (conformal.index[-24] - pd.Timedelta(days=5))]
356
  fig.add_trace(go.Scatter(x=last_week_best_forecast.index, y=last_week_best_forecast[my_forecast], mode='lines', name='Forecast EDS'))
 
663
  # Scatter plots for error distribution
664
  st.subheader('Error Distribution')
665
  st.write('The below scatter plots show the error distribution of all three fields: Solar, Wind and Load between the selected date range')
666
+ data_2024 = data[data.index.year > 2023]
667
  for i in range(0, len(forecast_columns), 2):
668
  actual_col = forecast_columns[i]
669
  forecast_col = forecast_columns[i + 1]
670
+ if forecast_col in data_2024.columns:
671
+ obs = data_2024[actual_col]
672
+ pred = data_2024[forecast_col]
673
  error = pred - obs
674
 
675
  fig = px.scatter(x=obs, y=pred, labels={'x': 'Observed [MW]', 'y': 'Predicted by ENTSO-E [MW]'})
 
924
  fig.update_layout(title=f'{weather_col} vs {actual_col}')
925
  st.plotly_chart(fig)
926
 
 
927