Spaces:

EDS-lab
/

Transparency_Plus

Sleeping

App Files Files Community

mmmapms commited on Oct 18, 2024

Commit

3a7655a

verified ·

1 Parent(s): 9df8181

Update app.py

Browse files

Files changed (1) hide show

app.py +196 -203

app.py CHANGED Viewed

@@ -103,6 +103,14 @@ if github_token:
     Data_PT=load_GitHub(github_token, 'PT_Entsoe_UTC.csv', hour, after_10_min)
     Data_ES=load_GitHub(github_token, 'ES_Entsoe_UTC.csv', hour, after_10_min)
     Data_AT=load_GitHub(github_token, 'AT_Entsoe_UTC.csv', hour, after_10_min)
     Data_BE=convert_European_time(Data_BE, 'Europe/Brussels')
     Data_FR=convert_European_time(Data_FR, 'Europe/Paris')
@@ -111,6 +119,14 @@ if github_token:
     Data_PT=convert_European_time(Data_PT, 'Europe/Lisbon')
     Data_ES=convert_European_time(Data_ES, 'Europe/Madrid')
     Data_AT=convert_European_time(Data_AT, 'Europe/Vienna')
 else:
@@ -131,7 +147,7 @@ with col2:
         st.image("energyville_logo.png", width=100)
-st.write("**Evaluate and analyze ENTSO-E Transparency Platform data quality, forecast accuracy, and energy trends for Portugal, Spain, Belgium, France, Germany, Austria, and the Netherlands.**")
 upper_space.markdown("""
 &nbsp;
@@ -142,13 +158,60 @@ countries = {
     'Overall': 'Overall',
     'Austria': 'AT',
     'Belgium': 'BE',
     'France': 'FR',
-    'Germany': 'DE',
     'Netherlands': 'NL',
     'Portugal': 'PT',
     'Spain': 'ES',
 }
 st.sidebar.header('Filters')
@@ -157,9 +220,6 @@ st.sidebar.caption("Choose the country for which you want to display data or for
 selected_country = st.sidebar.selectbox('Select Country', list(countries.keys()))
-# Ensure the date range provides two dates
 # Sidebar with radio buttons for different sections
 if selected_country != 'Overall':
     st.sidebar.subheader("Section")
@@ -168,78 +228,26 @@ if selected_country != 'Overall':
 else:
     section = None  # No section is shown when "Overall" is selected
-forecast_columns_with_wind_offshore = [
-    'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
-forecast_columns_no_wind_offshore = [
-    'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
 if selected_country == 'Overall':
     data = None  # You can set data to None or a specific dataset based on your logic
     section = None  # No section selected when "Overall" is chosen
 else:
     country_code = countries[selected_country]
     if country_code == 'BE':
-        forecast_columns=forecast_columns_with_wind_offshore
-        data = Data_BE
         weather_columns = ['Temperature', 'Wind Speed Onshore', 'Wind Speed Offshore']
         data['Temperature'] = data['temperature_2m_8']
-        data['Wind Speed Offshore'] = data['wind_speed_100m_4']
         data['Wind Speed Onshore'] = data['wind_speed_100m_8']
-    elif country_code == 'DE':
-        forecast_columns=forecast_columns_with_wind_offshore
-        data = Data_DE
-        weather_columns = ['Temperature', 'Wind Speed']
-        data['Temperature'] = data['temperature_2m']
-        data['Wind Speed'] = data['wind_speed_100m']
-    elif country_code == 'NL':
-        forecast_columns=forecast_columns_with_wind_offshore
-        data = Data_NL
-        weather_columns = ['Temperature', 'Wind Speed']
-        data['Temperature'] = data['temperature_2m']
-        data['Wind Speed'] = data['wind_speed_100m']
-    elif country_code == 'FR':
-        forecast_columns=forecast_columns_with_wind_offshore
-        data = Data_FR
-        weather_columns = ['Temperature', 'Wind Speed']
-        data['Temperature'] = data['temperature_2m']
-        data['Wind Speed'] = data['wind_speed_100m']
-    elif country_code == 'PT':
-        forecast_columns=forecast_columns_with_wind_offshore
-        data = Data_PT
-        weather_columns = ['Temperature', 'Wind Speed']
-        data['Temperature'] = data['temperature_2m']
-        data['Wind Speed'] = data['wind_speed_100m']
-    elif country_code == 'AT':
-        forecast_columns=forecast_columns_no_wind_offshore
-        data = Data_AT
-        weather_columns = ['Temperature', 'Wind Speed']
-        data['Temperature'] = data['temperature_2m']
-        data['Wind Speed'] = data['wind_speed_100m']
-    elif country_code == 'ES':
-        forecast_columns=forecast_columns_no_wind_offshore
-        data = Data_ES
         weather_columns = ['Temperature', 'Wind Speed']
         data['Temperature'] = data['temperature_2m']
         data['Wind Speed'] = data['wind_speed_100m']
-def add_feature(df2, df_main):
-    #df_main.index = pd.to_datetime(df_main.index)
-    #df2.index = pd.to_datetime(df2.index)
-    df_combined = df_main.combine_first(df2)
-    last_date_df1 = df_main.index.max()
-    first_date_df2 = df2.index.min()
-    if first_date_df2 == last_date_df1 + pd.Timedelta(hours=1):
-        df_combined = pd.concat([df_main, df2[df2.index > last_date_df1]], axis=0)
-    #df_combined.reset_index(inplace=True)
-    return df_combined
-#data.index = data.index.tz_localize('UTC')
 if section == 'Data Quality':
@@ -247,7 +255,7 @@ if section == 'Data Quality':
     st.write('The table below presents the data quality metrics focusing on the percentage of missing values and the occurrence of extreme or nonsensical values for the selected country.')
-    yesterday_midnight = pd.Timestamp(datetime.now().date() - pd.Timedelta(days=1)).replace(hour=23, minute=59, second=59)
     # Filter data until the end of yesterday (midnight)
     data_quality = data[data.index <= yesterday_midnight]
@@ -256,46 +264,33 @@ if section == 'Data Quality':
     missing_values = data_quality[forecast_columns].isna().mean() * 100
     missing_values = missing_values.round(2)
-    installed_capacities = {
-        'FR': { 'Solar': 17419, 'Wind Offshore': 1483, 'Wind Onshore': 22134},
-        'DE': { 'Solar': 73821, 'Wind Offshore': 8386, 'Wind Onshore': 59915},
-        'BE': { 'Solar': 8789, 'Wind Offshore': 2262, 'Wind Onshore': 3053},
-        'NL': { 'Solar': 22590, 'Wind Offshore': 3220, 'Wind Onshore': 6190},
-        'PT': { 'Solar': 1811, 'Wind Offshore': 25, 'Wind Onshore': 5333},
-        'ES': { 'Solar': 23867, 'Wind Onshore': 30159},
-        'AT': { 'Solar': 7294, 'Wind Onshore': 4021 }
-    }
     if country_code not in installed_capacities:
-        st.error(f"Installed capacities not defined for country code '{country_code}'.")
-        st.stop()
-    # Report % of extreme, impossible values for the selected country
-    capacities = installed_capacities[country_code]
-    extreme_values = {}
-    for col in forecast_columns:
-            if 'Solar_entsoe' in col:
-                extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Solar'])).mean() * 100
-            elif 'Solar_forecast_entsoe' in col:
-                extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Solar'])).mean() * 100
-            elif 'Wind_onshore_entsoe' in col:
-                extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Onshore'])).mean() * 100
-            elif 'Wind_onshore_forecast_entsoe' in col:
-                extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Onshore'])).mean() * 100
-            elif 'Wind_offshore_entsoe' in col:
-                extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Offshore'])).mean() * 100
-            elif 'Wind_offshore_forecast_entsoe' in col:
-                extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Offshore'])).mean() * 100
-            elif 'Load_entsoe' in col:
-                extreme_values[col] = ((data_quality[col] < 0)).mean() * 100
-            elif 'Load_forecast_entsoe' in col:
-                extreme_values[col] = ((data_quality[col] < 0)).mean() * 100
     extreme_values = pd.Series(extreme_values).round(2)
     # Combine all metrics into one DataFrame
     metrics_df = pd.DataFrame({
     'Missing Values (%)': missing_values,
@@ -316,7 +311,6 @@ if section == 'Data Quality':
     st.write('<b><u>Missing values (%)</u></b>: Percentage of missing values in the dataset', unsafe_allow_html=True)
     st.write('<b><u>Extreme/Nonsensical values (%)</u></b>: Values that are considered implausible such as negative or out-of-bound values i.e., (generation<0) or (generation>capacity)', unsafe_allow_html=True)
-# Section 2: Forecasts
 elif section == 'Forecasts Quality':
     st.header('Forecast Quality')
@@ -326,20 +320,21 @@ elif section == 'Forecasts Quality':
     st.write('The below plot shows the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform from the past week.')
     # Options for selecting the data to display
-    if country_code!='ES' and country_code!='AT':
         variable_options = {
             "Load": ("Load_entsoe", "Load_forecast_entsoe"),
             "Solar": ("Solar_entsoe", "Solar_forecast_entsoe"),
             "Wind Onshore": ("Wind_onshore_entsoe", "Wind_onshore_forecast_entsoe"),
             "Wind Offshore": ("Wind_offshore_entsoe", "Wind_offshore_forecast_entsoe")
         }
-    else:
         variable_options = {
             "Load": ("Load_entsoe", "Load_forecast_entsoe"),
             "Solar": ("Solar_entsoe", "Solar_forecast_entsoe"),
             "Wind Onshore": ("Wind_onshore_entsoe", "Wind_onshore_forecast_entsoe"),
         }
     # Dropdown to select the variable
     selected_variable = st.selectbox("Select Variable for Line PLot", list(variable_options.keys()))
@@ -375,12 +370,9 @@ elif section == 'Forecasts Quality':
         fig.update_layout(title=f'Error Distribution for {selected_variable}')
         st.plotly_chart(fig)
     st.subheader('Accuracy Metrics (Sorted by rMAE):')
     date_range = st.date_input(
         "Select Date Range for Metrics Calculation:",
         value=(pd.to_datetime("2024-01-01"), pd.to_datetime(pd.Timestamp('today')))
@@ -399,10 +391,13 @@ elif section == 'Forecasts Quality':
     data = data.loc[start_date:end_date]
-    if country_code!='ES' and country_code!='AT':
         accuracy_metrics = pd.DataFrame(columns=['MAE', 'rMAE'], index=['Load', 'Solar', 'Wind Onshore', 'Wind Offshore'])
-    else:
         accuracy_metrics = pd.DataFrame(columns=['MAE', 'rMAE'], index=['Load', 'Solar', 'Wind Onshore'])
     for i in range(0, len(forecast_columns), 2):
         actual_col = forecast_columns[i]
@@ -506,7 +501,6 @@ elif section == 'Forecasts Quality':
         # Optionally calculate and store ACF values for further analysis if needed
         acf_values = acf(error.dropna(), nlags=240)
-# Section 3: Insights
 elif section == 'Insights':
     st.header("Insights")
@@ -523,10 +517,12 @@ elif section == 'Insights':
         resampled_data = data_2024.resample('D').mean()  # Resample to daily mean
     # Select the necessary columns for the scatter plot
-    if country_code!='ES' and country_code!='AT':
         selected_columns = ['Load_entsoe', 'Solar_entsoe', 'Wind_offshore_entsoe', 'Wind_onshore_entsoe'] + weather_columns
-    else:
         selected_columns = ['Load_entsoe', 'Solar_entsoe', 'Wind_onshore_entsoe'] + weather_columns
     selected_df = resampled_data[selected_columns]
     selected_df.columns = [col.replace('_entsoe', '').replace('_', ' ') for col in selected_df.columns]
@@ -543,93 +539,95 @@ elif section == 'Insights':
 elif selected_country == 'Overall':
-    st.subheader("Net Load Error Map")
-    st.write("""
-        The net load error map highlights the error in the forecasted versus actual net load for each country.
-        Hover over each country to see details on the latest net load error and the timestamp (with the time zone of the corresponding country) of the last recorded data.
-    """)
     def get_forecast_columns(country_code):
-        if country_code in ['Belgium', 'Germany', 'Netherlands', 'France', 'Portugal']:
-            return ['Load_entsoe', 'Wind_onshore_entsoe', 'Solar_entsoe', 'Load_forecast_entsoe', 'Wind_onshore_forecast_entsoe', 'Solar_forecast_entsoe', 'Wind_offshore_entsoe', 'Wind_offshore_forecast_entsoe']
         else:
-            return ['Load_entsoe', 'Wind_onshore_entsoe', 'Solar_entsoe', 'Load_forecast_entsoe', 'Wind_onshore_forecast_entsoe', 'Solar_forecast_entsoe']
-    def plot_net_load_error_map(data_dict):
-        # Define forecast columns used in calculation
-        def calculate_net_load_error(df, country_code):
-            forecast_columns = get_forecast_columns(country_code)
-            filter_df = df[forecast_columns].dropna()
-            # Initialize net_load and net_load_forecast with Load and other available data
-            net_load = filter_df['Load_entsoe'] - filter_df['Wind_onshore_entsoe'] - filter_df['Solar_entsoe']
-            net_load_forecast = filter_df['Load_forecast_entsoe'] - filter_df['Wind_onshore_forecast_entsoe'] - filter_df['Solar_forecast_entsoe']
-            # Subtract Wind_offshore_entsoe if the column exists
-            if 'Wind_offshore_entsoe' in filter_df.columns:
-                net_load -= filter_df['Wind_offshore_entsoe']
-            # Subtract Wind_offshore_forecast_entsoe if the column exists
-            if 'Wind_offshore_forecast_entsoe' in filter_df.columns:
-                net_load_forecast -= filter_df['Wind_offshore_forecast_entsoe']
-            # Calculate the error based on the latest values
-            error = (net_load_forecast - net_load).iloc[-1]
-            date = filter_df.index[-1].strftime("%Y-%m-%d %H:%M")  # Get the latest date in string format
-            return error, date
         # Calculate net load errors and dates for each country
-        net_load_errors = {country_name: calculate_net_load_error(data, country_name) for country_name, data in data_dict.items()}
-        # Create DataFrame for Folium with additional date column
         df_net_load_error = pd.DataFrame({
-            'country': list(net_load_errors.keys()),
             'net_load_error': [v[0] for v in net_load_errors.values()],
             'date': [v[1] for v in net_load_errors.values()]
         })
-        # Load the GeoJSON file
-        geojson_url = "https://raw.githubusercontent.com/python-visualization/folium/master/examples/data/world-countries.json"
-        geo_data = requests.get(geojson_url).json()
-        # Filter GeoJSON to only include the selected countries
-        selected_countries = list(data_dict.keys())  # Get the list of selected countries (Belgium, France, Germany, Netherlands)
-        filtered_geojson = {
-            "type": "FeatureCollection",
-            "features": [feature for feature in geo_data["features"] if feature["properties"]["name"] in selected_countries]
-        }
-        # Merge the geojson with the error and date data
-        for feature in filtered_geojson["features"]:
-            country_name = feature["properties"]["name"]
-            row = df_net_load_error[df_net_load_error['country'] == country_name]
-            if not row.empty:
-                feature["properties"]["net_load_error"] = row.iloc[0]["net_load_error"]
-                feature["properties"]["date"] = row.iloc[0]["date"]
-        # Initialize the Folium map centered on Central Europe
-        m = folium.Map(location=[46.6034, 1.8883], zoom_start=4.5, tiles="cartodb positron")
-        # Add choropleth layer to map net load errors by country
-        folium.Choropleth(
-            geo_data=filtered_geojson,
-            name="choropleth",
-            data=df_net_load_error,
-            columns=["country", "net_load_error"],
-            key_on="feature.properties.name",
-            fill_color= "RdYlBu", #"RdYlBu",  # Use a more vibrant color palette
-            fill_opacity=0.7,
-            line_opacity=0.5,
-            line_color="black",  # Neutral border color
-            legend_name="Net Load Error [MW]"
-        ).add_to(m)
-        # Add a GeoJson layer with custom tooltip for country, error, and date
         folium.GeoJson(
-            filtered_geojson,
-            style_function=lambda x: {'fillOpacity': 0, 'color': 'black', 'weight': 0},
             tooltip=folium.GeoJsonTooltip(
                 fields=["name", "net_load_error", "date"],
                 aliases=["Country:", "Net Load Error [MW]:", "Date:"],
@@ -637,33 +635,18 @@ elif selected_country == 'Overall':
             )
         ).add_to(m)
-        # Display Folium map in Streamlit
-        st_folium(m, width=700, height=600)
-    data_dict = {
-        'Belgium': Data_BE,
-        'France': Data_FR,
-        'Germany': Data_DE,
-        'Netherlands': Data_NL,
-        'Portugal': Data_PT,
-        'Austria': Data_AT,
-        'Spain': Data_ES,
-    }
-    plot_net_load_error_map(data_dict)
-    st.subheader("rMAE of Forecasts published on ENTSO-E TP")
-    st.write("""The rMAE of Forecasts chart compares the forecast accuracy of the predictions published by ENTSO-E Transparency Platform for Portugal, Spain, Belgium, France, Germany, Austria, and the Netherlands. It shows the rMAE for onshore wind, offshore wind (if any), solar, and load demand, highlighting how well forecasts perform relative to a basic persistence model across these countries and energy sectors.""")
-    # Function to calculate MAE
     def calculate_mae(actual, forecast):
         return np.mean(np.abs(actual - forecast))
-    # Function to calculate persistence MAE
     def calculate_persistence_mae(data, shift_hours):
         return np.mean(np.abs(data - data.shift(shift_hours)))
-    # Function to calculate rMAE for each country
     def calculate_rmae_for_country(df):
         rmae = {}
         rmae['Load'] = calculate_mae(df['Load_entsoe'], df['Load_forecast_entsoe']) / calculate_persistence_mae(df['Load_entsoe'], 168)
@@ -679,7 +662,6 @@ elif selected_country == 'Overall':
         return rmae
-    # Function to create rMAE DataFrame
     def create_rmae_dataframe(data_dict):
         rmae_values = {'Country': [], 'Load': [], 'Wind_onshore': [], 'Wind_offshore': [], 'Solar': []}
@@ -702,7 +684,6 @@ elif selected_country == 'Overall':
         return pd.DataFrame(rmae_values)
-    # Function to plot radar chart
     def plot_rmae_radar_chart(rmae_df):
         fig = go.Figure()
@@ -728,9 +709,21 @@ elif selected_country == 'Overall':
         )
         st.plotly_chart(fig)
-    # Main execution to create and display radar plot
     rmae_df = create_rmae_dataframe(data_dict)
     plot_rmae_radar_chart(rmae_df)

     Data_PT=load_GitHub(github_token, 'PT_Entsoe_UTC.csv', hour, after_10_min)
     Data_ES=load_GitHub(github_token, 'ES_Entsoe_UTC.csv', hour, after_10_min)
     Data_AT=load_GitHub(github_token, 'AT_Entsoe_UTC.csv', hour, after_10_min)
+    Data_IT_CALA=load_GitHub(github_token, 'IT_CALA_Entsoe_UTC.csv', hour, after_10_min)
+    Data_IT_CNOR=load_GitHub(github_token, 'IT_CNOR_Entsoe_UTC.csv', hour, after_10_min)
+    Data_IT_CSUD=load_GitHub(github_token, 'IT_CSUD_Entsoe_UTC.csv', hour, after_10_min)
+    Data_IT_NORD=load_GitHub(github_token, 'IT_NORD_Entsoe_UTC.csv', hour, after_10_min)
+    Data_IT_SICI=load_GitHub(github_token, 'IT_SICI_Entsoe_UTC.csv', hour, after_10_min)
+    Data_IT_SUD=load_GitHub(github_token, 'IT_SUD_Entsoe_UTC.csv', hour, after_10_min)
+    Data_DK_1=load_GitHub(github_token, 'DK_1_Entsoe_UTC.csv', hour, after_10_min)
+    Data_DK_2=load_GitHub(github_token, 'DK_2_Entsoe_UTC.csv', hour, after_10_min)
     Data_BE=convert_European_time(Data_BE, 'Europe/Brussels')
     Data_FR=convert_European_time(Data_FR, 'Europe/Paris')
     Data_PT=convert_European_time(Data_PT, 'Europe/Lisbon')
     Data_ES=convert_European_time(Data_ES, 'Europe/Madrid')
     Data_AT=convert_European_time(Data_AT, 'Europe/Vienna')
+    Data_IT_CALA = convert_European_time(Data_IT_CALA, 'Europe/Rome')
+    Data_IT_CNOR = convert_European_time(Data_IT_CNOR, 'Europe/Rome')
+    Data_IT_CSUD = convert_European_time(Data_IT_CSUD, 'Europe/Rome')
+    Data_IT_NORD = convert_European_time(Data_IT_NORD, 'Europe/Rome')
+    Data_IT_SICI = convert_European_time(Data_IT_SICI, 'Europe/Rome')
+    Data_IT_SUD = convert_European_time(Data_IT_SUD, 'Europe/Rome')
+    Data_DK_1 = convert_European_time(Data_DK_1, 'Europe/Copenhagen')
+    Data_DK_2 = convert_European_time(Data_DK_2, 'Europe/Copenhagen')
 else:
         st.image("energyville_logo.png", width=100)
+st.write("**Evaluate and analyze ENTSO-E Transparency Platform data quality, forecast accuracy, and energy trends for Portugal, Spain, Belgium, France, Germany-Luxembourg, Austria, the Netherlands, Italy and Denmark.**")
 upper_space.markdown("""
 &nbsp;
     'Overall': 'Overall',
     'Austria': 'AT',
     'Belgium': 'BE',
+    'Denmark 1': 'DK_1',
+    'Denmark 2': 'DK_2',
     'France': 'FR',
+    'Germany-Luxembourg': 'DE_LU',
+    'Italy Calabria': 'IT_CALA',
+    'Italy Central North': 'IT_CNOR',
+    'Italy Central South': 'IT_CSUD',
+    'Italy North': 'IT_NORD',
+    'Italy Sicily': 'IT_SICI',
+    'Italy South': 'IT_SUD',
     'Netherlands': 'NL',
     'Portugal': 'PT',
     'Spain': 'ES',
 }
+data_dict = {
+    'BE': Data_BE,
+    'FR': Data_FR,
+    'DE_LU': Data_DE,
+    'NL': Data_NL,
+    'PT': Data_PT,
+    'AT': Data_AT,
+    'ES': Data_ES,
+    'IT_CALA': Data_IT_CALA,
+    'IT_CNOR': Data_IT_CNOR,
+    'IT_CSUD': Data_IT_CSUD,
+    'IT_NORD': Data_IT_NORD,
+    'IT_SICI': Data_IT_SICI,
+    'IT_SUD': Data_IT_SUD,
+    'DK_1': Data_DK_1,
+    'DK_2': Data_DK_2,
+}
+countries_all_RES = ['BE', 'FR', 'NL', 'DE_LU', 'PT', 'DK_1', 'DK_2']
+countries_no_offshore= ['AT', 'ES', 'IT_CALA', 'IT_CNOR', 'IT_CSUD', 'IT_NORD', 'IT_SICI', 'IT_SUD',]
+installed_capacities = {
+        'FR': { 'Solar': 17419, 'Wind Offshore': 1483, 'Wind Onshore': 22134},
+        'DE_LU': { 'Solar': 73821, 'Wind Offshore': 8386, 'Wind Onshore': 59915},
+        'BE': { 'Solar': 8789, 'Wind Offshore': 2262, 'Wind Onshore': 3053},
+        'NL': { 'Solar': 22590, 'Wind Offshore': 3220, 'Wind Onshore': 6190},
+        'PT': { 'Solar': 1811, 'Wind Offshore': 25, 'Wind Onshore': 5333},
+        'ES': { 'Solar': 23867, 'Wind Onshore': 30159},
+        'AT': { 'Solar': 7294, 'Wind Onshore': 4021 },
+        'DK_1': { 'Solar': 2738, 'Wind Offshore': 1601, 'Wind Onshore': 4112},
+        'DK_2': { 'Solar': 992, 'Wind Offshore': 	1045, 'Wind Onshore': 748},
+    }
+forecast_columns_all_RES = [
+    'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
+forecast_columns_no_wind_offshore = [
+    'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
 st.sidebar.header('Filters')
 selected_country = st.sidebar.selectbox('Select Country', list(countries.keys()))
 # Sidebar with radio buttons for different sections
 if selected_country != 'Overall':
     st.sidebar.subheader("Section")
 else:
     section = None  # No section is shown when "Overall" is selected
 if selected_country == 'Overall':
     data = None  # You can set data to None or a specific dataset based on your logic
     section = None  # No section selected when "Overall" is chosen
 else:
     country_code = countries[selected_country]
+    data = data_dict.get(country_code)
+    if country_code in countries_all_RES:
+        forecast_columns = forecast_columns_all_RES
+    elif country_code in countries_no_offshore:
+        forecast_columns = forecast_columns_no_wind_offshore
     if country_code == 'BE':
         weather_columns = ['Temperature', 'Wind Speed Onshore', 'Wind Speed Offshore']
         data['Temperature'] = data['temperature_2m_8']
         data['Wind Speed Onshore'] = data['wind_speed_100m_8']
+        data['Wind Speed Offshore'] = data['wind_speed_100m_4']
+    else:
         weather_columns = ['Temperature', 'Wind Speed']
         data['Temperature'] = data['temperature_2m']
         data['Wind Speed'] = data['wind_speed_100m']
 if section == 'Data Quality':
     st.write('The table below presents the data quality metrics focusing on the percentage of missing values and the occurrence of extreme or nonsensical values for the selected country.')
+    yesterday_midnight = pd.Timestamp(datetime.datetime.now().date() - pd.Timedelta(days=1)).replace(hour=23, minute=59, second=59)
     # Filter data until the end of yesterday (midnight)
     data_quality = data[data.index <= yesterday_midnight]
     missing_values = data_quality[forecast_columns].isna().mean() * 100
     missing_values = missing_values.round(2)
     if country_code not in installed_capacities:
+        st.markdown(f"⚠️ **Installed capacities not available on ENTSO-E Transparency Platform for country code '{country_code}'. Therefore, cannot calculate Extreme/Nonsensical values.**")
+        # If capacities are not available, assign NaN to extreme_values and skip extreme value checking
+        extreme_values = {col: np.nan for col in forecast_columns}
+    else:
+        capacities = installed_capacities[country_code]
+        extreme_values = {}
+        for col in forecast_columns:
+                if 'Solar_entsoe' in col:
+                    extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Solar'])).mean() * 100
+                elif 'Solar_forecast_entsoe' in col:
+                    extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Solar'])).mean() * 100
+                elif 'Wind_onshore_entsoe' in col:
+                    extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Onshore'])).mean() * 100
+                elif 'Wind_onshore_forecast_entsoe' in col:
+                    extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Onshore'])).mean() * 100
+                elif 'Wind_offshore_entsoe' in col:
+                    extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Offshore'])).mean() * 100
+                elif 'Wind_offshore_forecast_entsoe' in col:
+                    extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Offshore'])).mean() * 100
+                elif 'Load_entsoe' in col:
+                    extreme_values[col] = ((data_quality[col] < 0)).mean() * 100
+                elif 'Load_forecast_entsoe' in col:
+                    extreme_values[col] = ((data_quality[col] < 0)).mean() * 100
     extreme_values = pd.Series(extreme_values).round(2)
     # Combine all metrics into one DataFrame
     metrics_df = pd.DataFrame({
     'Missing Values (%)': missing_values,
     st.write('<b><u>Missing values (%)</u></b>: Percentage of missing values in the dataset', unsafe_allow_html=True)
     st.write('<b><u>Extreme/Nonsensical values (%)</u></b>: Values that are considered implausible such as negative or out-of-bound values i.e., (generation<0) or (generation>capacity)', unsafe_allow_html=True)
 elif section == 'Forecasts Quality':
     st.header('Forecast Quality')
     st.write('The below plot shows the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform from the past week.')
     # Options for selecting the data to display
+    if country_code in countries_all_RES:
         variable_options = {
             "Load": ("Load_entsoe", "Load_forecast_entsoe"),
             "Solar": ("Solar_entsoe", "Solar_forecast_entsoe"),
             "Wind Onshore": ("Wind_onshore_entsoe", "Wind_onshore_forecast_entsoe"),
             "Wind Offshore": ("Wind_offshore_entsoe", "Wind_offshore_forecast_entsoe")
         }
+    elif country_code in countries_no_offshore:
         variable_options = {
             "Load": ("Load_entsoe", "Load_forecast_entsoe"),
             "Solar": ("Solar_entsoe", "Solar_forecast_entsoe"),
             "Wind Onshore": ("Wind_onshore_entsoe", "Wind_onshore_forecast_entsoe"),
         }
+    else:
+        print('Country code doesnt correspond.')
     # Dropdown to select the variable
     selected_variable = st.selectbox("Select Variable for Line PLot", list(variable_options.keys()))
         fig.update_layout(title=f'Error Distribution for {selected_variable}')
         st.plotly_chart(fig)
     st.subheader('Accuracy Metrics (Sorted by rMAE):')
     date_range = st.date_input(
         "Select Date Range for Metrics Calculation:",
         value=(pd.to_datetime("2024-01-01"), pd.to_datetime(pd.Timestamp('today')))
     data = data.loc[start_date:end_date]
+    if country_code in countries_all_RES:
         accuracy_metrics = pd.DataFrame(columns=['MAE', 'rMAE'], index=['Load', 'Solar', 'Wind Onshore', 'Wind Offshore'])
+    elif country_code in countries_no_offshore:
         accuracy_metrics = pd.DataFrame(columns=['MAE', 'rMAE'], index=['Load', 'Solar', 'Wind Onshore'])
+    else:
+        print('Country code doesnt correspond.')
     for i in range(0, len(forecast_columns), 2):
         actual_col = forecast_columns[i]
         # Optionally calculate and store ACF values for further analysis if needed
         acf_values = acf(error.dropna(), nlags=240)
 elif section == 'Insights':
     st.header("Insights")
         resampled_data = data_2024.resample('D').mean()  # Resample to daily mean
     # Select the necessary columns for the scatter plot
+    if country_code in countries_all_RES:
         selected_columns = ['Load_entsoe', 'Solar_entsoe', 'Wind_offshore_entsoe', 'Wind_onshore_entsoe'] + weather_columns
+    elif country_code in countries_no_offshore:
         selected_columns = ['Load_entsoe', 'Solar_entsoe', 'Wind_onshore_entsoe'] + weather_columns
+    else:
+        print('Country code doesnt correspond.')
     selected_df = resampled_data[selected_columns]
     selected_df.columns = [col.replace('_entsoe', '').replace('_', ' ') for col in selected_df.columns]
 elif selected_country == 'Overall':
     def get_forecast_columns(country_code):
+        if country_code in countries_all_RES:
+            return forecast_columns_all_RES
+        elif country_code in countries_no_offshore:
+            return forecast_columns_no_wind_offshore
         else:
+            print('Country code doesnt correspond.')
+    def calculate_net_load_error(df, country_code):
+        forecast_columns = get_forecast_columns(country_code)
+        filter_df = df[forecast_columns].dropna()
+        # Initialize net_load and net_load_forecast with Load and other available data
+        net_load = filter_df['Load_entsoe'] - filter_df['Wind_onshore_entsoe'] - filter_df['Solar_entsoe']
+        net_load_forecast = filter_df['Load_forecast_entsoe'] - filter_df['Wind_onshore_forecast_entsoe'] - filter_df['Solar_forecast_entsoe']
+        # Subtract Wind_offshore_entsoe if the column exists
+        if 'Wind_offshore_entsoe' in filter_df.columns:
+            net_load -= filter_df['Wind_offshore_entsoe']
+        # Subtract Wind_offshore_forecast_entsoe if the column exists
+        if 'Wind_offshore_forecast_entsoe' in filter_df.columns:
+            net_load_forecast -= filter_df['Wind_offshore_forecast_entsoe']
+        # Calculate the error based on the latest values
+        error = (net_load_forecast - net_load).iloc[-1]
+        date = filter_df.index[-1].strftime("%Y-%m-%d %H:%M")  # Get the latest date in string format
+        return error, date
+    def plot_net_load_error_map(data_dict):
         # Calculate net load errors and dates for each country
+        net_load_errors = {country_code: calculate_net_load_error(data, country_code) for country_code, data in data_dict.items()}
+        # Use country codes directly
+        selected_country_codes = list(data_dict.keys())
         df_net_load_error = pd.DataFrame({
+            'zoneName': selected_country_codes,
             'net_load_error': [v[0] for v in net_load_errors.values()],
             'date': [v[1] for v in net_load_errors.values()]
         })
+        # Load the GeoJSON data using the entsoe library
+        date = pd.Timestamp.now()
+        geo_data = load_zones(selected_country_codes, date)
+        # Reset index to include 'zoneName' as a column
+        geo_data = geo_data.reset_index()
+        # Map country codes to country names
+        countries_code_to_name = {v: k for k, v in countries.items()}
+        geo_data['name'] = geo_data['zoneName'].map(countries_code_to_name)
+        # Merge net_load_error and date into geo_data
+        geo_data = geo_data.merge(df_net_load_error, on='zoneName', how='left')
+        # Initialize the Folium map
+        m = folium.Map(location=[46.6034, 1.8883], zoom_start=4, tiles="cartodb positron")
+        # Calculate the maximum absolute net load error for normalization
+        max_value = df_net_load_error['net_load_error'].abs().max()
+        # Create a colormap with lighter shades
+        colormap = branca.colormap.LinearColormap(
+            colors=['#0D92F4', 'white', '#C62E2E'],  # Light blue to white to light coral
+            vmin=-max_value,
+            vmax=max_value,
+            caption='Net Load Error [MW]'
+        )
+        # Define the style function
+        def style_function(feature):
+            net_load_error = feature['properties']['net_load_error']
+            if net_load_error is None:
+                return {'fillOpacity': 0.5, 'color': 'grey', 'weight': 0.5}
+            else:
+                fill_color = colormap(net_load_error)
+                return {
+                    'fillColor': fill_color,
+                    'fillOpacity': 0.8,  # Set a constant opacity
+                    'color': 'black',
+                    'weight': 0.5
+                }
+        # Add the GeoJson layer with the custom style_function
         folium.GeoJson(
+            geo_data,
+            style_function=style_function,
             tooltip=folium.GeoJsonTooltip(
                 fields=["name", "net_load_error", "date"],
                 aliases=["Country:", "Net Load Error [MW]:", "Date:"],
             )
         ).add_to(m)
+        # Add the colormap to the map
+        colormap.add_to(m)
+        # Display the map
+        st_folium(m, width=700, height=600)
     def calculate_mae(actual, forecast):
         return np.mean(np.abs(actual - forecast))
     def calculate_persistence_mae(data, shift_hours):
         return np.mean(np.abs(data - data.shift(shift_hours)))
     def calculate_rmae_for_country(df):
         rmae = {}
         rmae['Load'] = calculate_mae(df['Load_entsoe'], df['Load_forecast_entsoe']) / calculate_persistence_mae(df['Load_entsoe'], 168)
         return rmae
     def create_rmae_dataframe(data_dict):
         rmae_values = {'Country': [], 'Load': [], 'Wind_onshore': [], 'Wind_offshore': [], 'Solar': []}
         return pd.DataFrame(rmae_values)
     def plot_rmae_radar_chart(rmae_df):
         fig = go.Figure()
         )
         st.plotly_chart(fig)
+    st.subheader("Net Load Error Map")
+    st.write("""
+        The net load error map highlights the error in the forecasted versus actual net load for each country.
+        Hover over each country to see details on the latest net load error and the timestamp (with the time zone of the corresponding country) of the last recorded data.
+    """)
+    plot_net_load_error_map(data_dict)
+    st.subheader("rMAE of Forecasts published on ENTSO-E TP")
+    st.write("""The rMAE of Forecasts chart compares the forecast accuracy of the predictions published by ENTSO-E Transparency Platform for Portugal, Spain, Belgium, France, Germany-Luxembourg, Austria, the Netherlands, Italy and Denmark. It shows the rMAE for onshore wind, offshore wind (if any), solar, and load demand, highlighting how well forecasts perform relative to a basic persistence model across these countries and energy sectors.""")
     rmae_df = create_rmae_dataframe(data_dict)
     plot_rmae_radar_chart(rmae_df)