Spaces:

EDS-lab
/

Transparency_Plus

Running

App Files Files Community

mmmapms commited on Aug 26

Commit

f611b2b

•

1 Parent(s): 6521280

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -39

app.py CHANGED Viewed

@@ -13,8 +13,16 @@ from statsmodels.graphics.tsaplots import plot_acf
 import matplotlib.pyplot as plt
 ##GET ALL FILES FROM GITHUB
-def load_GitHub(github_token, file_name):
     url = f'https://raw.githubusercontent.com/margaridamascarenhas/Transparency_Data/main/{file_name}'
     headers = {'Authorization': f'token {github_token}'}
@@ -31,12 +39,13 @@ def load_GitHub(github_token, file_name):
     else:
         print(f"Failed to download {file_name}. Status code: {response.status_code}")
         return None
-def load_forecast(github_token):
     predictions_dict = {}
     for hour in range(24):
         file_name = f'Predictions_{hour}h.csv'
-        df = load_GitHub(github_token, file_name)
         if df is not None:
             predictions_dict[file_name] = df
     return predictions_dict
@@ -75,10 +84,12 @@ def simplify_model_names_in_index(df):
     return df
 github_token = st.secrets["GitHub_Token_KUL_Margarida"]
 if github_token:
-    forecast_dict = load_forecast(github_token)
     historical_forecast=load_GitHub(github_token, 'Historical_forecast.csv')
@@ -140,12 +151,11 @@ upper_space.markdown("""
 """, unsafe_allow_html=True)
 countries = {
-    'Belgium': 'BE',
     'Netherlands': 'NL',
     'Germany': 'DE',
     'France': 'FR',
 }
@@ -231,9 +241,12 @@ if section == 'Data':
     st.header('Data Quality')
     st.write('The table below presents the data quality metrics for various energy-related datasets, focusing on the percentage of missing values and the occurrence of extreme or nonsensical values for the selected country.')
     # Report % of missing values
-    missing_values = data[forecast_columns].isna().mean() * 100
     missing_values = missing_values.round(2)
     installed_capacities = {
@@ -254,21 +267,21 @@ if section == 'Data':
     for col in forecast_columns:
             if 'Solar_entsoe' in col:
-                extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Solar'])).mean() * 100
             elif 'Solar_forecast_entsoe' in col:
-                extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Solar'])).mean() * 100
             elif 'Wind_onshore_entsoe' in col:
-                extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Onshore'])).mean() * 100
             elif 'Wind_onshore_forecast_entsoe' in col:
-                extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Onshore'])).mean() * 100
             elif 'Wind_offshore_entsoe' in col:
-                extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Offshore'])).mean() * 100
             elif 'Wind_offshore_forecast_entsoe' in col:
-                extreme_values[col] = ((data[col] < 0) | (data[col] > capacities['Wind Offshore'])).mean() * 100
             elif 'Load_entsoe' in col:
-                extreme_values[col] = ((data[col] < 0)).mean() * 100
             elif 'Load_forecast_entsoe' in col:
-                extreme_values[col] = ((data[col] < 0)).mean() * 100
     extreme_values = pd.Series(extreme_values).round(2)
@@ -300,29 +313,34 @@ elif section == 'Forecasts':
     # Time series for last 1 week
     st.subheader('Time Series: Last 1 Week')
-    last_week = Data_BE.loc[Data_BE.index >= (data.index[-1] - pd.Timedelta(days=7))]
     st.write('The below plots show the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform between the selected data range.')
-    forecast_columns_operational = [
-    'Load_entsoe','Load_forecast_entsoe', 'Load_LightGBMModel.7D.TimeCov.Temp.Forecast_elia', 'Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_onshore_LightGBMModel.1D.TimeCov.Temp.Forecast_elia','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Wind_offshore_LightGBMModel.1D.TimeCov.Temp.Forecast_elia','Solar_entsoe','Solar_forecast_entsoe', 'Solar_LightGBMModel.1D.TimeCov.Temp.Forecast_elia']
     forecast_columns = [
     'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
-    operation_forecast_load=forecast_dict['Predictions_10h.csv'].filter(like='Load_', axis=1)
-    operation_forecast_res=forecast_dict['Predictions_17h.csv'].filter(regex='^(?!Load_)')
-    operation_forecast_load.columns = [col.replace('_entsoe.', '_').replace('Naive.7D', 'WeeklyNaiveSeasonal') for col in operation_forecast_load.columns]
-    operation_forecast_res.columns = [col.replace('_entsoe.', '_').replace('Naive.1D', 'DailyNaiveSeasonal') for col in operation_forecast_res.columns]
-    Historical_and_Load=add_feature(operation_forecast_load, historical_forecast)
-    Historical_and_operational=add_feature(operation_forecast_res, Historical_and_Load)
-    best_forecast = Historical_and_operational.filter(like='Forecast_elia', axis=1)
-    df_combined = Historical_and_operational.join(Data_BE, how='inner')
-    last_week_best_forecast = best_forecast.loc[best_forecast.index >= (best_forecast.index[-24] - pd.Timedelta(days=8))]
-    for i in range(0, len(forecast_columns_operational), 3):
-        actual_col = forecast_columns_operational[i]
-        forecast_col = forecast_columns_operational[i + 1]
-        my_forecast = forecast_columns_operational[i + 2]
         if forecast_col in data.columns:
@@ -332,7 +350,7 @@ elif section == 'Forecasts':
             if country_code=='BE':
                 conformal=conformal_predictions(df_combined, actual_col, my_forecast)
-                last_week_conformal = conformal.loc[conformal.index >= (conformal.index[-24] - pd.Timedelta(days=8))]
                 if actual_col =='Load_entsoe':
                     last_week_conformal = conformal.loc[conformal.index >= (conformal.index[-24] - pd.Timedelta(days=5))]
                 fig.add_trace(go.Scatter(x=last_week_best_forecast.index, y=last_week_best_forecast[my_forecast], mode='lines', name='Forecast EDS'))
@@ -645,12 +663,13 @@ elif section == 'Forecasts':
     # Scatter plots for error distribution
     st.subheader('Error Distribution')
     st.write('The below scatter plots show the error distribution of all three fields: Solar, Wind and Load between the selected date range')
     for i in range(0, len(forecast_columns), 2):
         actual_col = forecast_columns[i]
         forecast_col = forecast_columns[i + 1]
-        if forecast_col in data.columns:
-            obs = data[actual_col]
-            pred = data[forecast_col]
             error = pred - obs
             fig = px.scatter(x=obs, y=pred, labels={'x': 'Observed [MW]', 'y': 'Predicted by ENTSO-E [MW]'})
@@ -905,5 +924,4 @@ elif section == 'Insights':
                 fig.update_layout(title=f'{weather_col} vs {actual_col}')
                 st.plotly_chart(fig)

 import matplotlib.pyplot as plt
+def get_current_time():
+    now = datetime.now()
+    current_hour = now.hour
+    current_minute = now.minute
+    # Return the hour and a boolean indicating if it is after the 10th minute
+    return current_hour, current_minute >= 10
 ##GET ALL FILES FROM GITHUB
+@st.cache_data(show_spinner=False)
+def load_GitHub(github_token, file_name, hour, after_10_min):
     url = f'https://raw.githubusercontent.com/margaridamascarenhas/Transparency_Data/main/{file_name}'
     headers = {'Authorization': f'token {github_token}'}
     else:
         print(f"Failed to download {file_name}. Status code: {response.status_code}")
         return None
+@st.cache_data(show_spinner=False)
+def load_forecast(github_token, hour, after_10_min):
     predictions_dict = {}
     for hour in range(24):
         file_name = f'Predictions_{hour}h.csv'
+        df = load_GitHub(github_token, file_name, hour, after_10_min)
         if df is not None:
             predictions_dict[file_name] = df
     return predictions_dict
     return df
+current_hour, after_10_min = get_current_time()
 github_token = st.secrets["GitHub_Token_KUL_Margarida"]
 if github_token:
+    forecast_dict = load_forecast(github_token, current_hour, after_10_min)
     historical_forecast=load_GitHub(github_token, 'Historical_forecast.csv')
 """, unsafe_allow_html=True)
 countries = {
     'Netherlands': 'NL',
     'Germany': 'DE',
     'France': 'FR',
+    'Belgium': 'BE',
 }
     st.header('Data Quality')
     st.write('The table below presents the data quality metrics for various energy-related datasets, focusing on the percentage of missing values and the occurrence of extreme or nonsensical values for the selected country.')
+    data_quality=data.iloc[:-28]
+    if country_code=='BE':
+        data_quality=data.iloc[:-5*24]
+    print(data_quality.tail(48))
     # Report % of missing values
+    missing_values = data_quality[forecast_columns].isna().mean() * 100
     missing_values = missing_values.round(2)
     installed_capacities = {
     for col in forecast_columns:
             if 'Solar_entsoe' in col:
+                extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Solar'])).mean() * 100
             elif 'Solar_forecast_entsoe' in col:
+                extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Solar'])).mean() * 100
             elif 'Wind_onshore_entsoe' in col:
+                extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Onshore'])).mean() * 100
             elif 'Wind_onshore_forecast_entsoe' in col:
+                extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Onshore'])).mean() * 100
             elif 'Wind_offshore_entsoe' in col:
+                extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Offshore'])).mean() * 100
             elif 'Wind_offshore_forecast_entsoe' in col:
+                extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Offshore'])).mean() * 100
             elif 'Load_entsoe' in col:
+                extreme_values[col] = ((data_quality[col] < 0)).mean() * 100
             elif 'Load_forecast_entsoe' in col:
+                extreme_values[col] = ((data_quality[col] < 0)).mean() * 100
     extreme_values = pd.Series(extreme_values).round(2)
     # Time series for last 1 week
     st.subheader('Time Series: Last 1 Week')
+    last_week = data.loc[data.index >= (data.index[-1] - pd.Timedelta(days=7))]
     st.write('The below plots show the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform between the selected data range.')
     forecast_columns = [
     'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
+    num_per_var=2
+    if country_code=='BE':
+        operation_forecast_load=forecast_dict['Predictions_10h.csv'].filter(like='Load_', axis=1)
+        operation_forecast_res=forecast_dict['Predictions_17h.csv'].filter(regex='^(?!Load_)')
+        operation_forecast_load.columns = [col.replace('_entsoe.', '_').replace('Naive.7D', 'WeeklyNaiveSeasonal') for col in operation_forecast_load.columns]
+        operation_forecast_res.columns = [col.replace('_entsoe.', '_').replace('Naive.1D', 'DailyNaiveSeasonal') for col in operation_forecast_res.columns]
+        Historical_and_Load=add_feature(operation_forecast_load, historical_forecast)
+        Historical_and_operational=add_feature(operation_forecast_res, Historical_and_Load)
+        best_forecast = Historical_and_operational.filter(like='Forecast_elia', axis=1)
+        df_combined = Historical_and_operational.join(Data_BE, how='inner')
+        last_week_best_forecast = best_forecast.loc[best_forecast.index >= (best_forecast.index[-24] - pd.Timedelta(days=7))]
+        num_per_var=3
+        forecast_columns_line=['Load_entsoe','Load_forecast_entsoe', 'Load_LightGBMModel.7D.TimeCov.Temp.Forecast_elia', 'Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_onshore_LightGBMModel.1D.TimeCov.Temp.Forecast_elia','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Wind_offshore_LightGBMModel.1D.TimeCov.Temp.Forecast_elia','Solar_entsoe','Solar_forecast_entsoe', 'Solar_LightGBMModel.1D.TimeCov.Temp.Forecast_elia']
+    else:
+        forecast_columns_line=forecast_columns
+    for i in range(0, len(forecast_columns_line), num_per_var):
+        actual_col = forecast_columns_line[i]
+        forecast_col = forecast_columns_line[i + 1]
+        if country_code=='BE':
+            my_forecast = forecast_columns_line[i + 2]
         if forecast_col in data.columns:
             if country_code=='BE':
                 conformal=conformal_predictions(df_combined, actual_col, my_forecast)
+                last_week_conformal = conformal.loc[conformal.index >= (conformal.index[-24] - pd.Timedelta(days=7))]
                 if actual_col =='Load_entsoe':
                     last_week_conformal = conformal.loc[conformal.index >= (conformal.index[-24] - pd.Timedelta(days=5))]
                 fig.add_trace(go.Scatter(x=last_week_best_forecast.index, y=last_week_best_forecast[my_forecast], mode='lines', name='Forecast EDS'))
     # Scatter plots for error distribution
     st.subheader('Error Distribution')
     st.write('The below scatter plots show the error distribution of all three fields: Solar, Wind and Load between the selected date range')
+    data_2024 = data[data.index.year > 2023]
     for i in range(0, len(forecast_columns), 2):
         actual_col = forecast_columns[i]
         forecast_col = forecast_columns[i + 1]
+        if forecast_col in data_2024.columns:
+            obs = data_2024[actual_col]
+            pred = data_2024[forecast_col]
             error = pred - obs
             fig = px.scatter(x=obs, y=pred, labels={'x': 'Observed [MW]', 'y': 'Predicted by ENTSO-E [MW]'})
                 fig.update_layout(title=f'{weather_col} vs {actual_col}')
                 st.plotly_chart(fig)