danyoung commited on
Commit
6d95c4c
1 Parent(s): 0bd533d
.dockerignore ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .git
2
+ __pycache__
3
+ *.pyc
4
+ *.pyo
5
+ *.pyd
6
+ .DS_Store
7
+
8
+ .ipynb_checkpoints/
9
+ demo.ipynb
10
+ data/
11
+ predictors/
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ predictors/*.joblib
2
+ data/*.zip
3
+ data/processed/*.csv
4
+ *.tar.gz
Dockerfile ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ # Define environment variables
4
+ ENV APPS_HOME=/usr/local/cognizant \
5
+ ELUC_APP_HOME=/usr/local/cognizant/eluc \
6
+ GDAL_VERSION=3.7.1 \
7
+ PYTHONPATH=/usr/local/cognizant/eluc
8
+
9
+ # Debian basics and cleaning up in one RUN statement to reduce image size
10
+ RUN apt-get update -y && \
11
+ apt-get install --no-install-recommends curl git gcc g++ libgdal-dev -y && \
12
+ rm -rf /var/lib/apt/lists/*
13
+
14
+ # Set work directory
15
+ WORKDIR ${ELUC_APP_HOME}
16
+
17
+ # Dependencies
18
+ COPY requirements.txt .
19
+ RUN pip install --no-cache-dir --upgrade pip && \
20
+ pip install --no-cache-dir -r requirements.txt
21
+
22
+ # Copy source files over
23
+ COPY . .
24
+
25
+ # Expose Flask (Dash) port
26
+ EXPOSE 4057
27
+
28
+ # Run main UI
29
+ ENTRYPOINT ["gunicorn", "-b", "0.0.0.0:7860", "--timeout", "45", "app.app:server"]
README copy.md ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MVP Climate Change Demo
2
+
3
+ This is a demo of the MVP Climate Change app. It allows users to select a location and year from a map of the world, see its land use composition, and prescribe or manually make changes to it and see the predicted ELUC (Emissions from Land Use Change) and amount of land changed. It is a simple Dash app. The demo can be found online at [landuse.evoluion.ml](https://landuse.evolution.ml)
4
+
5
+ ## Downloading the data:
6
+
7
+ In ``data/`` there is a script called ``process_data.py``. This will download the entire 2.5GB data file from HuggingFace then process it into a 500MB csv that is used by the app. A token is required to download the data and must be saved in ``$HF_TOKEN``.
8
+
9
+ ## Predictors:
10
+
11
+ The RandomForest model is 1.7GB and is also saved on HuggingFace. To download it run ``download_predictors.py`` in ``predictors/``. This downloads a ``.joblib`` file that is loaded in the app.
12
+
13
+ ## Prescriptors:
14
+
15
+ Prescriptors are already stored in `prescriptors/` as well as the pareto front image and a CSV of pareto info from training the prescriptors.
16
+
17
+ ## Testing:
18
+
19
+ Testing can be done with ``python -m unittest discover``
20
+
21
+ To run specific tests run ``python -m unittest tests.test_app TestCase.test_specific_case``
22
+
23
+ ## Running the app:
24
+
25
+ To run the app call the app module with ``python -m app.app`` or use gunicorn with ``gunicorn -b 0.0.0.0:4057 app.app:server``.
26
+
27
+ ## Deployment:
28
+
29
+ Once ``process_data.py`` and ``download_predictors.py`` have been run, the app can be deployed by building with:
30
+ ```
31
+ docker build -t eluc-demo .
32
+ ```
33
+ then the container can be run with:
34
+ ```
35
+ docker run \
36
+ -v PATH_TO_PROJECT/mvp/use_cases/eluc/demo/data/processed:/usr/local/cognizant/eluc/data/processed:ro \
37
+ -v PATH_TO_PROJECT/mvp/use_cases/eluc/demo/predictors:/usr/local/cognizant/eluc/predictors:ro \
38
+ -p 8080:4057 eluc-demo
39
+ ```
40
+ Note: This mounts your local directories to the docker container, different steps may have to be taken for different setups.
app/.DS_Store ADDED
Binary file (6.15 kB). View file
 
app/Predictor.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC
2
+ from abc import abstractmethod
3
+ import warnings
4
+ from joblib import load
5
+
6
+ from . import constants
7
+
8
+ # Silence xgboost warnings
9
+ warnings.filterwarnings("ignore")
10
+
11
+ class Predictor(ABC):
12
+ """
13
+ Abstract class for predictor models to inherit.
14
+ """
15
+
16
+ @abstractmethod
17
+ def predict(self, input):
18
+ """
19
+ Input columns: CONTEXT_COLUMNS + DIFF_LAND_USE_COLS indexed by INDEX_COLS in constants.py
20
+ Output columns: ELUC float
21
+ Check output validity: scale of ELUC tC/ha caused by land use change passed in input
22
+ """
23
+ pass
24
+
25
+
26
+ class SkLearnPredictor(Predictor):
27
+ def __init__(self, load_path):
28
+ self.model = load(load_path)
29
+
30
+ def predict(self, input):
31
+ pred = self.model.predict(input)
32
+ return pred[0]
33
+
34
+
35
+ class CustomPredictor(Predictor):
36
+ """ You fill in here: """
37
+
app/Prescriptor.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ from typing import List
4
+ import pandas as pd
5
+ import numpy as np
6
+ from keras.models import load_model
7
+
8
+ from . import constants
9
+ from . import utils
10
+
11
+
12
+ class Prescriptor:
13
+ """
14
+ Wrapper for Keras prescriptor and encoder.
15
+ """
16
+
17
+ def __init__(self, prescriptor_id: str):
18
+ """
19
+ :param prescriptor_id: ID of Keras prescriptor to load.
20
+ """
21
+ prescriptor_model_filename = os.path.join(constants.PRESCRIPTOR_PATH,
22
+ prescriptor_id + '.h5')
23
+
24
+ self.prescriptor_model = load_model(prescriptor_model_filename, compile=False)
25
+
26
+ self.encoder = None
27
+ with open(constants.FIELDS_PATH, 'r') as f:
28
+ fields = json.load(f)
29
+ self.encoder = utils.Encoder(fields)
30
+
31
+
32
+ def _is_single_action_prescriptor(self, actions):
33
+ """
34
+ Checks how many Actions have been defined in the Context, Actions, Outcomes mapping.
35
+ :return: True if only 1 action is defined, False otherwise
36
+ """
37
+ return len(actions) == 1
38
+
39
+ def _is_scalar(self, prescribed_action):
40
+ """
41
+ Checks if the prescribed action contains a single value, i.e. a scalar, or an array.
42
+ A prescribed action contains a single value if it has been prescribed for a single context sample
43
+ :param prescribed_action: a scalar or an array
44
+ :return: True if the prescribed action contains a scalar, False otherwise.
45
+ """
46
+ return prescribed_action.shape[0] == 1 and prescribed_action.shape[1] == 1
47
+
48
+ def _convert_to_nn_input(self, context_df: pd.DataFrame) -> List[np.ndarray]:
49
+ """
50
+ Converts a context DataFrame to a list of numpy arrays a neural network can ingest
51
+ :param context_df: a DataFrame containing inputs for a neural network. Number of inputs and size must match
52
+ :return: a list of numpy ndarray, on ndarray per neural network input
53
+ """
54
+ # The NN expects a list of i inputs by s samples (e.g. 9 x 299).
55
+ # So convert the data frame to a numpy array (gives shape 299 x 9), transpose it (gives 9 x 299)
56
+ # and convert to list(list of 9 arrays of 299)
57
+ context_as_nn_input = list(context_df.to_numpy().transpose())
58
+ # Convert each column's list of 1D array to a 2D array
59
+ context_as_nn_input = [np.stack(context_as_nn_input[i], axis=0) for i in
60
+ range(len(context_as_nn_input))]
61
+ return context_as_nn_input
62
+
63
+ def __prescribe_from_model(self, context_df: pd.DataFrame) -> pd.DataFrame:
64
+ """
65
+ Generates prescriptions using the passed neural network candidate and context
66
+ ::param context_df: a DataFrame containing the context to prescribe for,
67
+ :return: a pandas DataFrame of action name to action value or list of action values
68
+ """
69
+ action_list = ['reco_land_use']
70
+
71
+ # Convert the input df
72
+ context_as_nn_input = self._convert_to_nn_input(context_df)
73
+ row_index = context_df.index
74
+
75
+ # Get the prescrib?ed actions
76
+ prescribed_actions = self.prescriptor_model.predict(context_as_nn_input)
77
+ actions = {}
78
+
79
+ if self._is_single_action_prescriptor(action_list):
80
+ # Put the single action in an array to process it like multiple actions
81
+ prescribed_actions = [prescribed_actions]
82
+
83
+ for idx, action_col in enumerate(action_list):
84
+ if self._is_scalar(prescribed_actions[idx]):
85
+ # We have a single row and this action is numerical. Convert it to a scalar.
86
+ actions[action_col] = prescribed_actions[idx].item()
87
+ else:
88
+ actions[action_col] = prescribed_actions[idx].tolist()
89
+
90
+ # Convert the prescribed actions to a DataFrame
91
+ prescribed_actions_df = pd.DataFrame(actions,
92
+ columns=action_list,
93
+ index=row_index)
94
+ return prescribed_actions_df
95
+
96
+
97
+ def run_prescriptor(self, sample_context_df):
98
+ """
99
+ Runs prescriptor on context. Then re-scales prescribed land
100
+ use to match how much was used in the sample.
101
+
102
+ :param sample_context_df: a DataFrame containing the context
103
+ :return: DataFrame of prescribed land use
104
+ """
105
+ encoded_sample_context_df = self.encoder.encode_as_df(sample_context_df)
106
+ prescribed_actions_df = self.__prescribe_from_model(encoded_sample_context_df)
107
+ reco_land_use_df = pd.DataFrame(prescribed_actions_df["reco_land_use"].tolist(),
108
+ columns=constants.RECO_COLS)
109
+
110
+ # Re-scales our prescribed land to match the amount of land used in the sample
111
+ used = sample_context_df[constants.RECO_COLS].iloc[0].sum()
112
+ reco_land_use_df = reco_land_use_df[constants.RECO_COLS].mul(used, axis=0)
113
+
114
+ # Reorder columns
115
+ return reco_land_use_df[constants.RECO_COLS]
app/__init__.py ADDED
File without changes
app/__pycache__/Predictor.cpython-310.pyc ADDED
Binary file (2.56 kB). View file
 
app/__pycache__/Prescriptor.cpython-310.pyc ADDED
Binary file (4.41 kB). View file
 
app/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (141 Bytes). View file
 
app/__pycache__/app.cpython-310.pyc ADDED
Binary file (20.3 kB). View file
 
app/__pycache__/constants.cpython-310.pyc ADDED
Binary file (1.63 kB). View file
 
app/__pycache__/utils.cpython-310.pyc ADDED
Binary file (9.37 kB). View file
 
app/app.py ADDED
@@ -0,0 +1,717 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from math import isclose
2
+
3
+ import os
4
+ import numpy as np
5
+ import pandas as pd
6
+ import regionmask
7
+ import plotly.graph_objects as go
8
+ from dash import ALL
9
+ from dash import MATCH
10
+ from dash import Dash
11
+ from dash import Input
12
+ from dash import Output
13
+ from dash import State
14
+ from dash import dcc
15
+ from dash import html
16
+ import dash_bootstrap_components as dbc
17
+
18
+ from . import Predictor
19
+ from . import Prescriptor
20
+ from . import constants
21
+ from . import utils
22
+
23
+ app = Dash(__name__,
24
+ external_stylesheets=[dbc.themes.BOOTSTRAP, dbc.icons.BOOTSTRAP],
25
+ prevent_initial_callbacks="initial_duplicate")
26
+ server = app.server
27
+
28
+ df = pd.read_csv(constants.DATA_FILE_PATH, index_col=constants.INDEX_COLS)
29
+ countries_df = regionmask.defined_regions.natural_earth_v5_0_0.countries_110.to_dataframe()
30
+
31
+ # Prescriptor list should be in order of least to most change
32
+ pareto_df = pd.read_csv(constants.PARETO_CSV_PATH)
33
+ prescriptor_list = list(pareto_df["id"])
34
+
35
+ # Cells
36
+ min_lat = df.index.get_level_values("lat").min()
37
+ max_lat = df.index.get_level_values("lat").max()
38
+ min_lon = df.index.get_level_values("lon").min()
39
+ max_lon = df.index.get_level_values("lon").max()
40
+ min_time = df.index.get_level_values("time").min()
41
+ max_time = df.index.get_level_values("time").max()
42
+
43
+ lat_list = list(np.arange(min_lat, max_lat + constants.GRID_STEP, constants.GRID_STEP))
44
+ lon_list = list(np.arange(min_lon, max_lon + constants.GRID_STEP, constants.GRID_STEP))
45
+
46
+ map_fig = go.Figure()
47
+
48
+ # Load predictors
49
+ predictors = utils.load_predictors()
50
+
51
+ # Legend examples come from https://hess.copernicus.org/preprints/hess-2021-247/hess-2021-247-ATC3.pdf
52
+ legend_div = html.Div(
53
+ style={},
54
+ children = [
55
+ dcc.Markdown('''
56
+ ### Land Use Types
57
+
58
+ Primary: Vegetation that is untouched by humans
59
+
60
+ - primf: Primary forest
61
+ - primn: Primary nonforest vegetation
62
+
63
+
64
+ Secondary: Vegetation that has been touched by humans
65
+
66
+ - secdf: Secondary forest
67
+ - secdn: Secondary nonforest vegetation
68
+
69
+ Urban
70
+
71
+ Crop
72
+
73
+ - c3ann: Annual C3 crops (e.g. wheat)
74
+ - c4ann: Annual C4 crops (e.g. maize)
75
+ - c3per: Perennial C3 crops (e.g. banana)
76
+ - c4per: Perennial C4 crops (e.g. sugarcane)
77
+ - c3nfx: Nitrogen fixing C3 crops (e.g. soybean)
78
+
79
+ Pasture
80
+
81
+ - pastr: Managed pasture land
82
+ - range: Natural grassland/savannah/desert/etc.
83
+ ''')
84
+ ]
85
+ )
86
+
87
+ context_div = html.Div(
88
+ style={'display': 'grid',
89
+ 'grid-template-columns': 'auto 1fr', 'grid-template-rows': 'auto auto auto auto',
90
+ 'position': 'absolute', 'bottom': '0'},
91
+ children=[
92
+ html.P("Region", style={'grid-column': '1', 'grid-row': '1', 'padding-right': '10px'}),
93
+ dcc.Dropdown(
94
+ id="loc-dropdown",
95
+ options=list(countries_df["names"]),
96
+ value=list(countries_df["names"])[143],
97
+ style={'grid-column': '2', 'grid-row': '1', 'width': '75%', 'justify-self': 'left', 'margin-top': '-3px'}
98
+ ),
99
+ html.P("Lat", style={'grid-column': '1', 'grid-row': '2', 'padding-right': '10px'}),
100
+ dcc.Dropdown(
101
+ id='lat-dropdown',
102
+ options=lat_list,
103
+ placeholder="Select a latitude",
104
+ value=51.625,
105
+ style={'grid-column': '2', 'grid-row': '2', 'width': '75%', 'justify-self': 'left', 'margin-top': '-3px',}
106
+ ),
107
+ html.P("Lon", style={'grid-column': '1', 'grid-row': '3', 'padding-right': '10px'}),
108
+ dcc.Dropdown(
109
+ id='lon-dropdown',
110
+ options=lon_list,
111
+ placeholder="Select a longitude",
112
+ value=-3.375,
113
+ style={'grid-column': '2', 'grid-row': '3', 'width': '75%', 'justify-self': 'left', 'margin-top': '-3px'}
114
+ ),
115
+ html.P("Year ", style={'grid-column': '1', 'grid-row': '4', 'margin-right': '10px'}),
116
+ html.Div([
117
+ dcc.Input(
118
+ id="year-input",
119
+ type="number",
120
+ value=2021,
121
+ debounce=True
122
+ ),
123
+ dcc.Tooltip(f"Year must be between {min_time} and {max_time}."),
124
+ ], style={'grid-column': '2', 'grid-row': '4', 'width': '75%', 'justify-self': 'left', 'margin-top': '-3px'}),
125
+ ]
126
+ )
127
+
128
+ presc_select_div = html.Div([
129
+ html.P("Minimize change", style={"grid-column": "1"}),
130
+ html.Div([
131
+ dcc.Slider(id='presc-select',
132
+ min=0, max=len(prescriptor_list)-1, step=1,
133
+ value=constants.DEFAULT_PRESCRIPTOR_IDX,
134
+ included=False,
135
+ marks={i : "" for i in range(len(prescriptor_list))})
136
+ ], style={"grid-column": "2", "width": "100%", "margin-top": "8px"}),
137
+ html.P("Minimize ELUC", style={"grid-column": "3", "padding-right": "10px"}),
138
+ html.Button("Prescribe", id='presc-button', n_clicks=0, style={"grid-column": "4", "margin-top": "-10px"}),
139
+ html.Button("View Pareto", id='pareto-button', n_clicks=0, style={"grid-column": "5", "margin-top": "-10px"}),
140
+ dbc.Modal(
141
+ [
142
+ dbc.ModalHeader("Pareto front"),
143
+ dcc.Graph(id='pareto-fig', figure=utils.create_pareto(pareto_df=pareto_df,
144
+ presc_id=prescriptor_list[constants.DEFAULT_PRESCRIPTOR_IDX])),
145
+ ],
146
+ id="pareto-modal",
147
+ is_open=False,
148
+ ),
149
+ ], style={"display": "grid", "grid-template-columns": "auto 1fr auto auto", "width": "100%", "align-content": "center"})
150
+
151
+ chart_select_div = dcc.Dropdown(
152
+ options=constants.CHART_TYPES,
153
+ id="chart-select",
154
+ value=constants.CHART_TYPES[0],
155
+ clearable=False
156
+ )
157
+
158
+ check_options = utils.create_check_options(constants.RECO_COLS)
159
+ checklist_div = html.Div([
160
+ dcc.Checklist(check_options, id="locks", inputStyle={"margin-bottom": "30px"})
161
+ ])
162
+
163
+ sliders_div = html.Div([
164
+ html.Div([
165
+ #html.P(col, style={"grid-column": "1"}),
166
+ html.Div([
167
+ dcc.Slider(
168
+ min=0,
169
+ max=1,
170
+ step=constants.SLIDER_PRECISION,
171
+ value=0,
172
+ marks=None,
173
+ tooltip={"placement": "bottom", "always_visible": False},
174
+ id={"type": "presc-slider", "index": f"{col}"}
175
+ )
176
+ ], style={"grid-column": "1", "width": "100%", "margin-top": "8px"}),
177
+ dcc.Input(
178
+ value="0%",
179
+ type="text",
180
+ disabled=True,
181
+ id={"type": "slider-value", "index": f"{col}"},
182
+ style={"grid-column": "2", "text-align": "right", "margin-top": "-5px"}),
183
+ ], style={"display": "grid", "grid-template-columns": "1fr 15%"}) for col in constants.RECO_COLS]
184
+ )
185
+
186
+ frozen_div = html.Div([
187
+ dcc.Input(
188
+ value=f"{col}: 0.00%",
189
+ type="text",
190
+ disabled=True,
191
+ id={"type": "frozen-input", "index": f"{col}-frozen"}) for col in constants.NO_CHANGE_COLS + ["nonland"]
192
+ ])
193
+
194
+ predict_div = html.Div([
195
+ dcc.Dropdown(list((predictors.keys())), list(predictors.keys())[0], id="pred-select", style={"width": "200px"}),
196
+ html.Button("Predict", id='predict-button', n_clicks=0,),
197
+ html.Label("Predicted ELUC:", style={'padding-left': '10px'}),
198
+ dcc.Input(
199
+ value="",
200
+ type="text",
201
+ disabled=True,
202
+ id="predict-eluc",
203
+ ),
204
+ html.Label("tC/ha", style={'padding-left': '2px'}),
205
+ html.Label("Land Change:", style={'padding-left': '10px'}),
206
+ dcc.Input(
207
+ value="",
208
+ type="text",
209
+ disabled=True,
210
+ id="predict-change",
211
+ ),
212
+ html.Label("%", style={'padding-left': '2px'}),
213
+ ], style={"display": "flex", "flex-direction": "row", "width": "90%", "align-items": "center"})
214
+
215
+ inline_block = {"display": "inline-block", "padding-right": "10px"}
216
+ trivia_div = html.Div([
217
+ html.Div(className="parent", children=[
218
+ html.P("Total emissions reduced from this land use change: ", className="child", style=inline_block),
219
+ html.P(id="total-em", style={"font-weight": "bold"}|inline_block)
220
+ ]),
221
+ html.Div(className="parent", children=[
222
+ html.I(className="bi bi-airplane", style=inline_block),
223
+ html.P("Flight emissions from flying JFK to Geneva: ", className="child", style=inline_block),
224
+ html.P(f"{constants.CO2_JFK_GVA} tonnes CO2", style={"font-weight": "bold"}|inline_block)
225
+ ]),
226
+ html.Div(className="parent", children=[
227
+ html.I(className="bi bi-airplane", style=inline_block),
228
+ html.P("Plane tickets mitigated: ", className="child", style=inline_block),
229
+ html.P(id="tickets", style={"font-weight": "bold"}|inline_block)
230
+ ]),
231
+ html.Div(className="parent", children=[
232
+ html.I(className="bi bi-person", style=inline_block),
233
+ html.P("Total yearly carbon emissions of average world citizen: ", className="child", style=inline_block),
234
+ html.P(f"{constants.CO2_PERSON} tonnes CO2", style={"font-weight": "bold"}|inline_block)
235
+ ]),
236
+ html.Div(className="parent", children=[
237
+ html.I(className="bi bi-person", style=inline_block),
238
+ html.P("Number of peoples' carbon emissions mitigated from this change : ", className="child", style=inline_block),
239
+ html.P(id="people", style={"font-weight": "bold"}|inline_block)
240
+ ]),
241
+ html.P("(Sources: https://flightfree.org/flight-emissions-calculator https://scied.ucar.edu/learning-zone/climate-solutions/carbon-footprint)", style={"font-size": "10px"})
242
+ ])
243
+
244
+ references_div = html.Div([
245
+ html.Div(className="parent", children=[
246
+ html.P("Code for this project can be found here: ",
247
+ className="child", style=inline_block),
248
+ html.A("(Project Resilience MVP repo)", href="https://github.com/Project-Resilience/mvp/tree/main/use_cases/eluc\n"),
249
+ ]),
250
+ html.Div(className="parent", children=[
251
+ html.P("The paper for this project can be found here: ",
252
+ className="child", style=inline_block),
253
+ html.A("(arXiv link)", href="https://arxiv.org/abs/2311.12304\n"),
254
+ ]),
255
+ html.Div(className="parent", children=[
256
+ html.P("ELUC data provided by the BLUE model ",
257
+ className="child", style=inline_block),
258
+ html.A("(BLUE: Bookkeeping of land use emissions)", href="https://agupubs.onlinelibrary.wiley.com/doi/10.1002/2014GB004997\n"),
259
+ ]),
260
+ html.Div(className="parent", children=[
261
+ html.P("Land use change data provided by the LUH2 project",
262
+ className="child", style=inline_block),
263
+ html.A("(LUH2: Land Use Harmonization 2)", href="https://luh.umd.edu/\n"),
264
+ ]),
265
+ html.Div(className="parent", children=[
266
+ html.P("Setup is described in Appendix C2.1 of the GCB 2022 report",
267
+ className="child", style=inline_block),
268
+ html.A("(Global Carbon Budget 2022 report)", href="https://essd.copernicus.org/articles/14/4811/2022/#section10/\n"),
269
+ ]),
270
+ html.Div(className="parent", children=[
271
+ html.P("The Global Carbon Budget report assesses the global CO2 budget for the Intergovernmental Panel on Climate Change",
272
+ className="child", style=inline_block),
273
+ html.A("(IPCC)", href="https://www.ipcc.ch/\n"),
274
+ ]),
275
+ ])
276
+
277
+
278
+ @app.callback(
279
+ Output("pareto-modal", "is_open"),
280
+ Output("pareto-fig", "figure"),
281
+ [Input("pareto-button", "n_clicks")],
282
+ [State("pareto-modal", "is_open")],
283
+ [State("presc-select", "value")],
284
+ )
285
+ def toggle_modal(n, is_open, presc_idx):
286
+ """
287
+ Toggles pareto modal.
288
+ :param n: Number of times button has been clicked.
289
+ :param is_open: Whether the modal is open.
290
+ :param presc_idx: The index of the prescriptor to show.
291
+ :return: The new state of the modal and the figure to show.
292
+ """
293
+ fig = utils.create_pareto(pareto_df, prescriptor_list[presc_idx])
294
+ if n:
295
+ return not is_open, fig
296
+ return is_open, fig
297
+
298
+
299
+ @app.callback(
300
+ Output("lat-dropdown", "value"),
301
+ Output("lon-dropdown", "value"),
302
+ Input("map", "clickData"),
303
+ prevent_initial_call=True
304
+ )
305
+ def click_map(click_data):
306
+ """
307
+ Selects context when point on map is clicked.
308
+ :param click_data: Input data from click action.
309
+ :return: The new longitude and latitude to put into the dropdowns.
310
+ """
311
+ return click_data["points"][0]["lat"], click_data["points"][0]["lon"]
312
+
313
+ @app.callback(
314
+ Output("lat-dropdown", "value", allow_duplicate=True),
315
+ Output("lon-dropdown", "value", allow_duplicate=True),
316
+ Input("loc-dropdown", "value"),
317
+ State("year-input", "value"),
318
+ prevent_initial_call=True
319
+ )
320
+ def select_country(location, year):
321
+ """
322
+ Changes the selected country and relocates map to a valid lat/lon.
323
+ This makes the update_map function only load the current country's data.
324
+ :param location: Selected country name.
325
+ :param year: Used to get proper # of points to sample from.
326
+ :return: A sample latitude/longitude point within the selected country.
327
+ """
328
+ country_idx = countries_df[countries_df["names"] == location].index[0]
329
+ samples = df[df["country"] == country_idx].loc[year]
330
+ example = samples.iloc[len(samples) // 2]
331
+ return example.name[0], example.name[1]
332
+
333
+
334
+ @app.callback(
335
+ Output("map", "figure"),
336
+ Input("year-input", "value"),
337
+ Input("lat-dropdown", "value"),
338
+ Input("lon-dropdown", "value"),
339
+ State("loc-dropdown", "value"),
340
+ )
341
+ def update_map(year, lat, lon, location):
342
+ """
343
+ Updates map data behind the scenes when year is clicked.
344
+ Changes focus when region is selected.
345
+ :param location: Selected country name.
346
+ :param year: The selected year.
347
+ :return: A newly created map.
348
+ """
349
+ country_idx = countries_df[countries_df["names"] == location].index[0]
350
+ # Filter data by year and location
351
+ data = df.loc[year]
352
+ data = data[data["country"] == country_idx]
353
+ data = data.copy().reset_index()
354
+
355
+ # Find colored point
356
+ lat_lon = (data["lat"] == lat) & (data["lon"] == lon)
357
+ idx = data[lat_lon].index[0]
358
+
359
+ return utils.create_map(data, 10, idx)
360
+
361
+
362
+ @app.callback(
363
+ Output({"type": "frozen-input", "index": ALL}, "value"),
364
+ Output({"type": "presc-slider", "index": ALL}, "value"),
365
+ Output({"type": "presc-slider", "index": ALL}, "max"),
366
+ Input("lat-dropdown", "value"),
367
+ Input("lon-dropdown", "value"),
368
+ Input("year-input", "value")
369
+ )
370
+ def set_frozen_reset_sliders(lat, lon, year):
371
+ """
372
+ Resets prescription sliders to 0 to avoid confusion.
373
+ Also sets prescription sliders' max values to 1 - no change cols to avoid negative values.
374
+ :param lat: Selected latitude.
375
+ :param lon: Selected longitude.
376
+ :param year: Selected year.
377
+ :return: Frozen values, slider values, and slider max.
378
+ """
379
+ context = df.loc[year, lat, lon]
380
+
381
+ chart_data = utils.add_nonland(context[constants.LAND_USE_COLS])
382
+
383
+ frozen_cols = constants.NO_CHANGE_COLS + ["nonland"]
384
+ frozen = chart_data[frozen_cols].tolist()
385
+ frozen = [f"{frozen_cols[i]}: {frozen[i]*100:.2f}%" for i in range(len(frozen_cols))]
386
+
387
+ reset = [0 for _ in constants.RECO_COLS]
388
+
389
+ max_val = chart_data[constants.RECO_COLS].sum()
390
+ maxes = [max_val for _ in range(len(constants.RECO_COLS))]
391
+
392
+ return frozen, reset, maxes
393
+
394
+
395
+ @app.callback(
396
+ Output("context-fig", "figure"),
397
+ Input("chart-select", "value"),
398
+ Input("year-input", "value"),
399
+ Input("lat-dropdown", "value"),
400
+ Input("lon-dropdown", "value")
401
+ )
402
+ def update_context_chart(chart_type, year, lat, lon):
403
+ """
404
+ Updates context chart when context store is updated or chart type is changed.
405
+ :param chart_type: String input from chart select dropdown.
406
+ :param year: Selected context year.
407
+ :param lat: Selected context lat.
408
+ :param lon: Selected context lon.
409
+ :return: New figure type selected by chart_type with data context.
410
+ """
411
+ context = df.loc[year, lat, lon]
412
+ chart_data = utils.add_nonland(context[constants.LAND_USE_COLS])
413
+
414
+ assert chart_type in ("Treemap", "Pie Chart")
415
+
416
+ if chart_type == "Treemap":
417
+ return utils.create_treemap(chart_data, type_context=True, year=year)
418
+
419
+ return utils.create_pie(chart_data, type_context=True, year=year)
420
+
421
+
422
+ @app.callback(
423
+ Output({"type": "presc-slider", "index": ALL}, "value", allow_duplicate=True),
424
+ Input("presc-button", "n_clicks"),
425
+ State("presc-select", "value"),
426
+ State("year-input", "value"),
427
+ State("lat-dropdown", "value"),
428
+ State("lon-dropdown", "value"),
429
+ prevent_initial_call=True
430
+ )
431
+ def select_prescriptor(n_clicks, presc_idx, year, lat, lon):
432
+ """
433
+ Selects prescriptor, runs on context, updates sliders.
434
+ :param n_clicks: Unused number of times button has been clicked.
435
+ :param presc_idx: Index of prescriptor in PRESCRIPTOR_LIST to load.
436
+ :param year: Selected context year.
437
+ :param lat: Selected context lat.
438
+ :param lon: Selected context lon.
439
+ :return: Updated slider values.
440
+ """
441
+ presc_id = prescriptor_list[presc_idx]
442
+ prescriptor = Prescriptor.Prescriptor(presc_id)
443
+ context = df.loc[year, lat, lon][constants.CONTEXT_COLUMNS]
444
+ context_df = pd.DataFrame([context])
445
+ prescribed = prescriptor.run_prescriptor(context_df)
446
+ return prescribed.iloc[0].tolist()
447
+
448
+
449
+ @app.callback(
450
+ Output({"type": "slider-value", "index": MATCH}, "value"),
451
+ Input({"type": "presc-slider", "index": MATCH}, "value")
452
+ )
453
+ def show_slider_value(slider):
454
+ """
455
+ Displays slider values next to sliders.
456
+ :param sliders: Slider values.
457
+ :return: Slider values.
458
+ """
459
+ return f"{slider * 100:.2f}%"
460
+
461
+
462
+ @app.callback(
463
+ Output("sum-warning", "children"),
464
+ Output("predict-change", "value"),
465
+ Input({"type": "presc-slider", "index": ALL}, "value"),
466
+ State("year-input", "value"),
467
+ State("lat-dropdown", "value"),
468
+ State("lon-dropdown", "value"),
469
+ State("locks", "value"),
470
+ prevent_initial_call=True
471
+ )
472
+ def compute_land_change(sliders, year, lat, lon, locked):
473
+ """
474
+ Computes land change percent for output.
475
+ Warns user if values don't sum to 1.
476
+ :param sliders: Slider values to store.
477
+ :param year: Selected context year.
478
+ :param lat: Selected context lat.
479
+ :param lon: Selected context lon.
480
+ :param locked: Locked columns to check for warning.
481
+ :return: Warning if necessary, land change percent.
482
+ """
483
+ context = df.loc[year, lat, lon][constants.LAND_USE_COLS]
484
+ presc = pd.Series(sliders, index=constants.RECO_COLS)
485
+
486
+ warnings = []
487
+ # Check if prescriptions sum to 1
488
+ # TODO: Are we being precise enough?
489
+ new_sum = presc.sum()
490
+ old_sum = context[constants.RECO_COLS].sum()
491
+ if not isclose(new_sum, old_sum, rel_tol=1e-7):
492
+ warnings.append(html.P(f"WARNING: Please make sure prescriptions sum to: {str(old_sum * 100)} instead of {str(new_sum * 100)} by clicking \"Sum to 100\""))
493
+
494
+ # Check if sum of locked prescriptions are > sum(land use)
495
+ # TODO: take a look at this logic.
496
+ if locked and presc[locked].sum() > old_sum:
497
+ warnings.append(html.P("WARNING: Sum of locked prescriptions is greater than sum of land use. Please reduce one before proceeding"))
498
+
499
+ # Check if any prescriptions below 0
500
+ if (presc < 0).any():
501
+ warnings.append(html.P("WARNING: Negative values detected. Please lower the value of a locked slider."))
502
+
503
+ # Compute total change
504
+ change = utils.compute_percent_change(context, presc)
505
+
506
+ return warnings, f"{change * 100:.2f}"
507
+
508
+
509
+ @app.callback(
510
+ Output("presc-fig", "figure"),
511
+ Input("chart-select", "value"),
512
+ Input({"type": "presc-slider", "index": ALL}, "value"),
513
+ State("year-input", "value"),
514
+ State("lat-dropdown", "value"),
515
+ State("lon-dropdown", "value"),
516
+ prevent_initial_call=True
517
+ )
518
+ def update_presc_chart(chart_type, sliders, year, lat, lon):
519
+ """
520
+ Updates prescription pie from store according to chart type.
521
+ :param chart_type: String input from chart select dropdown.
522
+ :param sliders: Prescribed slider values.
523
+ :param year: Selected context year (also for title of chart).
524
+ :param lat: Selected context lat.
525
+ :param lon: Selected context lon.
526
+ :return: New chart of type chart_type using presc data.
527
+ """
528
+
529
+ # If we have no prescription just return an empty chart
530
+ if all(slider == 0 for slider in sliders):
531
+ return utils.create_treemap(pd.Series([]), type_context=False, year=year)
532
+
533
+ presc = pd.Series(sliders, index=constants.RECO_COLS)
534
+ context = df.loc[year, lat, lon]
535
+
536
+ chart_data = context[constants.LAND_USE_COLS].copy()
537
+ chart_data[constants.RECO_COLS] = presc[constants.RECO_COLS]
538
+
539
+ # Manually calculate nonland from context so that it's not zeroed out by sliders.
540
+ nonland = 1 - context[constants.LAND_USE_COLS].sum()
541
+ nonland = nonland if nonland > 0 else 0
542
+ chart_data["nonland"] = nonland
543
+
544
+ assert chart_type in ("Treemap", "Pie Chart")
545
+
546
+ if chart_type == "Treemap":
547
+ return utils.create_treemap(chart_data, type_context=False, year=year)
548
+
549
+ return utils.create_pie(chart_data, type_context=False, year=year)
550
+
551
+
552
+ @app.callback(
553
+ Output({"type": "presc-slider", "index": ALL}, "value", allow_duplicate=True),
554
+ Input("sum-button", "n_clicks"),
555
+ State({"type": "presc-slider", "index": ALL}, "value"),
556
+ State("year-input", "value"),
557
+ State("lat-dropdown", "value"),
558
+ State("lon-dropdown", "value"),
559
+ State("locks", "value"),
560
+ prevent_initial_call=True
561
+ )
562
+ def sum_to_1(n_clicks, sliders, year, lat, lon, locked):
563
+ """
564
+ Sets slider values to sum to how much land was used in context.
565
+ Subtracts locked sum from both of these and doesn't adjust them.
566
+ :param n_clicks: Unused number of times button has been clicked.
567
+ :param sliders: Prescribed slider values to set to sum to 1.
568
+ :param year: Selected context year.
569
+ :param lat: Selected context lat.
570
+ :param lon: Selected context lon.
571
+ :param locked: Which sliders to not consider in calculation.
572
+ :return: Slider values scaled down to fit percentage of land used in context.
573
+ """
574
+ context = df.loc[year, lat, lon]
575
+ presc = pd.Series(sliders, index=constants.RECO_COLS)
576
+
577
+ old_sum = context[constants.RECO_COLS].sum()
578
+ new_sum = presc.sum()
579
+
580
+ # TODO: There is certainly a more elegant way to handle this.
581
+ if locked:
582
+ unlocked = [col for col in constants.RECO_COLS if col not in locked]
583
+ locked_sum = presc[locked].sum()
584
+ old_sum -= locked_sum
585
+ new_sum -= locked_sum
586
+ # We do this to avoid divide by zero. In the case where new_sum == 0
587
+ # we have all locked columns and/or zero columns so no adjustment is needed
588
+ if new_sum != 0:
589
+ presc[unlocked] = presc[unlocked].div(new_sum).mul(old_sum)
590
+
591
+ else:
592
+ presc = presc.div(new_sum).mul(old_sum)
593
+
594
+ # Set all negative values to 0
595
+ presc[presc < 0] = 0
596
+ return presc.tolist()
597
+
598
+
599
+ @app.callback(
600
+ Output("predict-eluc", "value"),
601
+ Input("predict-button", "n_clicks"),
602
+ State("year-input", "value"),
603
+ State("lat-dropdown", "value"),
604
+ State("lon-dropdown", "value"),
605
+ State({"type": "presc-slider", "index": ALL}, "value"),
606
+ State("pred-select", "value"),
607
+ prevent_initial_call=True
608
+ )
609
+ def predict(n_clicks, year, lat, lon, sliders, predictor_name):
610
+ """
611
+ Predicts ELUC from context and prescription stores.
612
+ :param n_clicks: Unused number of times button has been clicked.
613
+ :param year: Selected context year.
614
+ :param lat: Selected context lat.
615
+ :param lon: Selected context lon.
616
+ :param sliders: Prescribed slider values.
617
+ :param predictor_name: String name of predictor to use from dropdown.
618
+ :return: Predicted ELUC.
619
+ """
620
+ context = df.loc[year, lat, lon]
621
+ presc = pd.Series(sliders, index=constants.RECO_COLS)
622
+
623
+ # Preprocess presc into diffs
624
+ presc = presc.combine_first(context[constants.NO_CHANGE_COLS])
625
+ diff = presc[constants.LAND_USE_COLS] - context[constants.LAND_USE_COLS]
626
+ diff = diff.rename(constants.COLS_MAP)
627
+ diff_df = pd.DataFrame([diff])
628
+
629
+ predictor = predictors[predictor_name]
630
+ eluc = predictor.predict(diff_df)
631
+ return f"{eluc:.4f}"
632
+
633
+
634
+ @app.callback(
635
+ Output("total-em", "children"),
636
+ Output("tickets", "children"),
637
+ Output("people", "children"),
638
+ Input("predict-eluc", "value"),
639
+ State("year-input", "value"),
640
+ State("lat-dropdown", "value"),
641
+ State("lon-dropdown", "value"),
642
+ prevent_initial_call=True
643
+ )
644
+ def update_trivia(eluc_str, year, lat, lon):
645
+ """
646
+ Updates trivia section based on rounded ELUC value.
647
+ :param eluc_str: ELUC in string form.
648
+ :param year: Selected context year.
649
+ :param lat: Selected context lat.
650
+ :param lon: Selected context lon.
651
+ :return: Trivia string output.
652
+ """
653
+ context = df.loc[year, lat, lon]
654
+ area = context["cell_area"]
655
+
656
+ # Calculate total reduction
657
+ eluc = float(eluc_str)
658
+ total_reduction = eluc * area
659
+ return f"{-1 * total_reduction:,.2f} tonnes CO2", \
660
+ f"{-1 * total_reduction // constants.CO2_JFK_GVA:,.0f} tickets", \
661
+ f"{-1 * total_reduction // constants.CO2_PERSON:,.0f} people"
662
+
663
+
664
+ app.title = 'Land Use Optimization'
665
+ app.css.config.serve_locally = False
666
+ # Don't be afraid of the 3rd party URLs: chriddyp is the author of Dash!
667
+ # These two allow us to dim the screen while loading.
668
+ # See discussion with Dash devs here: https://community.plotly.com/t/dash-loading-states/5687
669
+ app.css.append_css({'external_url': 'https://codepen.io/chriddyp/pen/bWLwgP.css'})
670
+ app.css.append_css({'external_url': 'https://codepen.io/chriddyp/pen/brPBPO.css'})
671
+
672
+ app.layout = html.Div([
673
+ dcc.Markdown('''
674
+ # Land Use Optimization
675
+ This site is for demonstration purposes only.
676
+
677
+ For a given context cell representing a portion of the earth,
678
+ identified by its latitude and longitude coordinates, and a given year:
679
+ * What changes can we make to the land usage
680
+ * In order to minimize the resulting estimated CO2 emissions? (Emissions from Land Use Change, ELUC,
681
+ in tons of carbon per hectare)
682
+
683
+ *Note: the prescriptor model is currently only trained on Western Europe*
684
+ '''),
685
+ dcc.Markdown('''## Context'''),
686
+ html.Div([
687
+ dcc.Graph(id="map", figure=map_fig, style={"grid-column": "1"}),
688
+ html.Div([context_div], style={"grid-column": "2"}),
689
+ html.Div([legend_div], style={"grid-column": "3"})
690
+ ], style={"display": "grid", "grid-template-columns": "auto 1fr auto", 'position': 'relative'}),
691
+ dcc.Markdown('''## Actions'''),
692
+ html.Div([
693
+ html.Div([presc_select_div], style={"grid-column": "1"}),
694
+ html.Div([chart_select_div], style={"grid-column": "2", "margin-top": "-10px", "margin-left": "10px"}),
695
+ ], style={"display": "grid", "grid-template-columns": "45% 15%"}),
696
+ html.Div([
697
+ html.Div(checklist_div, style={"grid-column": "1", "height": "100%"}),
698
+ html.Div(sliders_div, style={'grid-column': '2'}),
699
+ dcc.Graph(id='context-fig', figure=utils.create_treemap(type_context=True), style={'grid-column': '3'}),
700
+ dcc.Graph(id='presc-fig', figure=utils.create_treemap(type_context=False), style={'grid-clumn': '4'})
701
+ ], style={'display': 'grid', 'grid-template-columns': 'auto 40% 1fr 1fr', "width": "100%"}),
702
+ html.Div([
703
+ frozen_div,
704
+ html.Button("Sum to 100%", id='sum-button', n_clicks=0),
705
+ html.Div(id='sum-warning')
706
+ ]),
707
+ dcc.Markdown('''## Outcomes'''),
708
+ predict_div,
709
+ dcc.Markdown('''## Trivia'''),
710
+ trivia_div,
711
+ dcc.Markdown('''## References'''),
712
+ references_div
713
+ ], style={'padding-left': '10px'},)
714
+
715
+
716
+ if __name__ == '__main__':
717
+ app.run_server(host='0.0.0.0', debug=False, port=4057, use_reloader=False, threaded=False)
app/assets/favicon.ico ADDED
app/constants.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import os
3
+ import json
4
+
5
+ ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
6
+ DATA_FILE_PATH = os.path.join(ROOT_DIR, "../data/processed/eluc_1982.csv")
7
+
8
+ GRID_STEP = 0.25
9
+
10
+ INDEX_COLS = ["time", "lat", "lon"]
11
+
12
+ LAND_USE_COLS = ['c3ann', 'c3nfx', 'c3per', 'c4ann', 'c4per', 'pastr', 'primf', 'primn', 'range', 'secdf', 'secdn', 'urban']
13
+ CONTEXT_COLUMNS = LAND_USE_COLS + ['cell_area']
14
+ DIFF_LAND_USE_COLS = [f"{col}_diff" for col in LAND_USE_COLS]
15
+ COLS_MAP = dict(zip(LAND_USE_COLS, DIFF_LAND_USE_COLS))
16
+
17
+ # Prescriptor outputs
18
+ RECO_COLS = ['c3ann', 'c3nfx', 'c3per','c4ann', 'c4per', 'pastr', 'range', 'secdf', 'secdn']
19
+ DIFF_RECO_COLS = [f"{col}_diff" for col in RECO_COLS]
20
+ RECO_MAP = dict(zip(RECO_COLS, DIFF_RECO_COLS))
21
+
22
+ NO_CHANGE_COLS = ["primf", "primn", "urban"]
23
+ CHART_COLS = LAND_USE_COLS + ["nonland"]
24
+
25
+ SLIDER_PRECISION = 1e-5
26
+
27
+ # Tonnes of CO2 per person for a flight from JFK to Geneva
28
+ CO2_JFK_GVA = 2.2
29
+ CO2_PERSON = 4
30
+
31
+ # For creating treemap
32
+ C3 = ['c3ann', 'c3nfx', 'c3per']
33
+ C4 = ['c4ann', 'c4per']
34
+ PRIMARY = ['primf', 'primn']
35
+ SECONDARY = ['secdf', 'secdn']
36
+ FIELDS = ['pastr', 'range']
37
+
38
+ CHART_TYPES = ["Treemap", "Pie Chart"]
39
+
40
+ PREDICTOR_PATH = os.path.join(ROOT_DIR, "../predictors/")
41
+ PRESCRIPTOR_PATH = os.path.join(ROOT_DIR, "../prescriptors/")
42
+
43
+ # Pareto front
44
+ PARETO_CSV_PATH = os.path.join(PRESCRIPTOR_PATH, "pareto.csv")
45
+ PARETO_FRONT_PATH = os.path.join(PRESCRIPTOR_PATH, "pareto_front.png")
46
+ PARETO_FRONT = base64.b64encode(open(PARETO_FRONT_PATH, 'rb').read()).decode('ascii')
47
+
48
+ FIELDS_PATH = os.path.join(PRESCRIPTOR_PATH, "fields.json")
49
+
50
+ DEFAULT_PRESCRIPTOR_IDX = 3 # By default we select the fourth prescriptor that minimizes change
app/utils.py ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import json
3
+ import os
4
+ from sklearn.preprocessing import MinMaxScaler
5
+ import plotly.express as px
6
+ import plotly.graph_objects as go
7
+ from dash import html
8
+
9
+ from . import constants
10
+ from . import Predictor
11
+
12
+
13
+ class Encoder:
14
+ """
15
+ Takes a field dictionary and creates min/max scalers using their ranges.
16
+ Field dictionary needs to be in format (see prescriptors/fields.json):
17
+ {
18
+ "field a": {"range": [x, y]},
19
+ "field b": {"range": [z, s]}
20
+ }
21
+ """
22
+ def __init__(self, fields):
23
+ self.transformers = {}
24
+ for field in fields:
25
+ field_values = fields[field]["range"]
26
+ self.transformers[field] = MinMaxScaler(clip=True)
27
+ data_df = pd.DataFrame({field: field_values})
28
+ self.transformers[field].fit(data_df)
29
+
30
+
31
+ def encode_as_df(self, df):
32
+ """
33
+ Encodes a given dataframe using the min max scalers.
34
+ :param df: a dataframe to encode
35
+ :return: a dataframe of encoded values. Only returns columns in the transformer dictionary.
36
+ """
37
+ values_by_column = {}
38
+ for col in df:
39
+ if col in self.transformers:
40
+ encoded_values = self.transformers[col].transform(df[[col]])
41
+ values_by_column[col] = encoded_values.squeeze().tolist()
42
+
43
+ encoded_df = pd.DataFrame.from_records(values_by_column,
44
+ index=list(range(df.shape[0]))
45
+ )[values_by_column.keys()]
46
+ return encoded_df
47
+
48
+
49
+ def add_nonland(data: pd.Series) -> pd.Series:
50
+ """
51
+ Adds a nonland column that is the difference between 1 and
52
+ LAND_USE_COLS.
53
+ Note: Since sum isn't exactly 1 we just set to 0 if we get a negative.
54
+ :param data: pd Series containing land use data.
55
+ :return: pd Series with nonland column added.
56
+ """
57
+ data = data[constants.LAND_USE_COLS]
58
+ nonland = 1 - data.sum() if data.sum() <= 1 else 0
59
+ data['nonland'] = nonland
60
+ return data[constants.CHART_COLS]
61
+
62
+
63
+ def create_map(df: pd.DataFrame, zoom=10, color_idx = None) -> go.Figure:
64
+ """
65
+ Creates map figure with data centered and zoomed in with appropriate point marked.
66
+ :param df: DataFrame of data to plot. This dataframe has its index reset.
67
+ :param lat_center: Latitude to center map on.
68
+ :param lon_center: Longitude to center map on.
69
+ :param zoom: Zoom level of map.
70
+ :param color_idx: Index of point to color red in reset index.
71
+ :return: Plotly figure
72
+ """
73
+ color_seq = [px.colors.qualitative.Plotly[0], px.colors.qualitative.Plotly[1]]
74
+
75
+ # Add color column
76
+ color = ["blue" for _ in range(len(df))]
77
+ if color_idx:
78
+ color[color_idx] = "red"
79
+ df["color"] = color
80
+
81
+ map_fig = px.scatter_geo(
82
+ df,
83
+ lat="lat",
84
+ lon="lon",
85
+ color="color",
86
+ color_discrete_sequence=color_seq,
87
+ hover_data={"lat": True, "lon": True, "color": False},
88
+ size_max=10
89
+ )
90
+
91
+ map_fig.update_layout(margin={"l": 0, "r": 10, "t": 0, "b": 0}, showlegend=False)
92
+ map_fig.update_geos(projection_scale=zoom, projection_type="orthographic", showcountries=True, fitbounds="locations")
93
+ return map_fig
94
+
95
+
96
+ def create_check_options(values: list) -> list:
97
+ """
98
+ Creates dash HTML options for checklist based on values.
99
+ :param values: List of values to create options for.
100
+ :return: List of dash HTML options.
101
+ """
102
+ options = []
103
+ for val in values:
104
+ options.append(
105
+ {"label": [html.I(className="bi bi-lock"), html.Span(val)],
106
+ "value": val})
107
+ return options
108
+
109
+
110
+ def compute_percent_change(context: pd.Series, presc: pd.Series) -> float:
111
+ """
112
+ Computes percent land use change from context to presc
113
+ :param context: Context land use data
114
+ :param presc: Prescribed land use data
115
+ :return: Percent land use change
116
+ """
117
+ diffs = presc[constants.RECO_COLS] - context[constants.RECO_COLS]
118
+ change = diffs[diffs > 0].sum()
119
+ total = context[constants.LAND_USE_COLS].sum()
120
+
121
+ # If we can't change the land use just return 0.
122
+ if total <= 0:
123
+ return 0
124
+
125
+ percent_changed = change / total
126
+ assert percent_changed <= 1
127
+
128
+ return percent_changed
129
+
130
+
131
+ def _create_hovertext(labels: list, parents: list, values: list, title: str) -> list:
132
+ """
133
+ Helper function that formats the hover text for the treemap to be 2 decimals.
134
+ :param labels: Labels according to treemap format.
135
+ :param parents: Parents for each label according to treemap format.
136
+ :param values: Values for each label according to treemap format.
137
+ :param title: Title of treemap, root node's name.
138
+ :return: List of hover text strings.
139
+ """
140
+ hovertext = []
141
+ for i, label in enumerate(labels):
142
+ v = values[i] * 100
143
+ # Get value of parent or 100 if parent is ''
144
+ parent_v = values[labels.index(parents[i])] * 100 if parents[i] != '' else values[0] * 100
145
+ if parents[i] == '':
146
+ hovertext.append(f"{label}: {v:.2f}%")
147
+ elif parents[i] == title:
148
+ hovertext.append(f"{label}<br>{v:.2f}% of {title}")
149
+ else:
150
+ hovertext.append(f"{label}<br>{v:.2f}% of {title}<br>{(v/parent_v)*100:.2f}% of {parents[i]}")
151
+
152
+ return hovertext
153
+
154
+
155
+ def create_treemap(data=pd.Series, type_context=True, year=2021) -> go.Figure:
156
+ """
157
+ :param data: Pandas series of land use data
158
+ :param type_context: If the title should be context or prescribed
159
+ :return: Treemap figure
160
+ """
161
+ title = f"Context in {year}" if type_context else f"Prescribed for {year+1}"
162
+
163
+ tree_params = {
164
+ "branchvalues": "total",
165
+ "sort": False,
166
+ "texttemplate": "%{label}<br>%{percentRoot:.2%}",
167
+ "hoverinfo": "label+percent root+percent parent",
168
+ "root_color": "lightgrey"
169
+ }
170
+
171
+ labels, parents, values = None, None, None
172
+
173
+ if data.empty:
174
+ labels = [title]
175
+ parents = [""]
176
+ values = [1]
177
+
178
+ else:
179
+ total = data[constants.LAND_USE_COLS].sum()
180
+ c3 = data[constants.C3].sum()
181
+ c4 = data[constants.C4].sum()
182
+ crops = c3 + c4
183
+ primary = data[constants.PRIMARY].sum()
184
+ secondary = data[constants.SECONDARY].sum()
185
+ fields = data[constants.FIELDS].sum()
186
+
187
+ labels = [title, "Nonland",
188
+ "Crops", "C3", "C4", "c3ann", "c3nfx", "c3per", "c4ann", "c4per",
189
+ "Primary Vegetation", "primf", "primn",
190
+ "Secondary Vegetation", "secdf", "secdn",
191
+ "Urban",
192
+ "Fields", "pastr", "range"]
193
+ parents = ["", title,
194
+ title, "Crops", "Crops", "C3", "C3", "C3", "C4", "C4",
195
+ title, "Primary Vegetation", "Primary Vegetation",
196
+ title, "Secondary Vegetation", "Secondary Vegetation",
197
+ title,
198
+ title, "Fields", "Fields"]
199
+
200
+ values = [total + data["nonland"], data["nonland"],
201
+ crops, c3, c4, data["c3ann"], data["c3nfx"], data["c3per"], data["c4ann"], data["c4per"],
202
+ primary, data["primf"], data["primn"],
203
+ secondary, data["secdf"], data["secdn"],
204
+ data["urban"],
205
+ fields, data["pastr"], data["range"]]
206
+
207
+ tree_params["customdata"] = _create_hovertext(labels, parents, values, title)
208
+ tree_params["hovertemplate"] = "%{customdata}<extra></extra>"
209
+
210
+ assert len(labels) == len(parents)
211
+ assert len(parents) == len(values)
212
+
213
+ fig = go.Figure(
214
+ go.Treemap(
215
+ labels = labels,
216
+ parents = parents,
217
+ values = values,
218
+ **tree_params
219
+ )
220
+ )
221
+ colors = px.colors.qualitative.Plotly
222
+ fig.update_layout(
223
+ treemapcolorway = [colors[1], colors[4], colors[2], colors[7], colors[3], colors[0]],
224
+ margin={"t": 0, "b": 0, "l": 10, "r": 10}
225
+ )
226
+ return fig
227
+
228
+
229
+ def create_pie(data=pd.Series, type_context=True, year=2021) -> go.Figure:
230
+ """
231
+ :param data: Pandas series of land use data
232
+ :param type_context: If the title should be context or prescribed
233
+ :return: Pie chart figure
234
+ """
235
+
236
+ values = None
237
+
238
+ # Sum for case where all zeroes, which allows us to display pie even when presc is reset
239
+ if data.empty or data.sum() == 0:
240
+ values = [0 for _ in range(len(constants.CHART_COLS))]
241
+ values[-1] = 1
242
+
243
+ else:
244
+ values = data[constants.CHART_COLS].tolist()
245
+
246
+ assert(len(values) == len(constants.CHART_COLS))
247
+
248
+ title = f"Context in {year}" if type_context else f"Prescribed for {year+1}"
249
+
250
+ p = px.colors.qualitative.Plotly
251
+ ps = px.colors.qualitative.Pastel1
252
+ d = px.colors.qualitative.Dark24
253
+ #['c3ann', 'c3nfx', 'c3per', 'c4ann', 'c4per', 'pastr', 'primf', 'primn',
254
+ # 'range', 'secdf', 'secdn', 'urban', 'nonland]
255
+ colors = [p[4], d[8], ps[4], p[9], ps[5], p[0], p[2], d[14], p[5], p[7], d[2], p[3], p[1]]
256
+ fig = go.Figure(
257
+ go.Pie(
258
+ values = values,
259
+ labels = constants.CHART_COLS,
260
+ textposition = "inside",
261
+ sort = False,
262
+ marker_colors = colors,
263
+ hovertemplate = "%{label}<br>%{value}<br>%{percent}<extra></extra>",
264
+ title = title
265
+ )
266
+ )
267
+
268
+ if type_context:
269
+ fig.update_layout(showlegend=False)
270
+ # To make up for the hidden legend
271
+ fig.update_layout(margin={"t": 50, "b": 50, "l": 50, "r": 50})
272
+
273
+ else:
274
+ fig.update_layout(margin={"t": 0, "b": 0, "l": 0, "r": 0})
275
+
276
+ return fig
277
+
278
+
279
+ def create_pareto(pareto_df: pd.DataFrame, presc_id: int) -> go.Figure:
280
+ """
281
+ :param pareto_df: Pandas data frame containing the pareto front
282
+ :param presc_id: The currently selected prescriptor id
283
+ :return: A pareto plot figure
284
+ """
285
+ fig = go.Figure(
286
+ go.Scatter(
287
+ x=pareto_df['change'] * 100,
288
+ y=pareto_df['ELUC'],
289
+ # marker='o',
290
+ )
291
+ )
292
+ # Highlight the selected prescriptor
293
+ presc_df = pareto_df[pareto_df["id"] == presc_id]
294
+ fig.add_scatter(x=presc_df['change'] * 100,
295
+ y=presc_df['ELUC'],
296
+ marker={
297
+ "color": 'red',
298
+ "size": 10
299
+ })
300
+ # Name axes and hide legend
301
+ fig.update_layout(xaxis_title={"text": "Change (%)"},
302
+ yaxis_title={"text": 'ELUC (tC/ha)'},
303
+ showlegend=False,
304
+ title="Prescriptors",
305
+ )
306
+ fig.update_traces(hovertemplate="Average Change: %{x} <span>&#37;</span>"
307
+ "<br>"
308
+ " Average ELUC: %{y} tC/ha<extra></extra>")
309
+ return fig
310
+
311
+
312
+ def load_predictors() -> dict:
313
+ """
314
+ Loads in predictors from json file according to config.
315
+ :return: dict of predictor name -> predictor object.
316
+ """
317
+ predictor_cfg = json.load(open(os.path.join(constants.PREDICTOR_PATH, "predictors.json")))
318
+ predictors = dict()
319
+ # This is ok because python dicts are ordered.
320
+ for row in predictor_cfg["predictors"]:
321
+ predictors[row["name"]] = Predictor.SkLearnPredictor(os.path.join(constants.PREDICTOR_PATH, row["filename"]))
322
+ return predictors
data/.DS_Store ADDED
Binary file (6.15 kB). View file
 
data/process_data.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ import regionmask
4
+ import xarray as xr
5
+
6
+ from huggingface_hub import hf_hub_download
7
+
8
+ LAND_FEATURES = ['c3ann', 'c3nfx', 'c3per','c4ann', 'c4per',
9
+ 'pastr', 'primf', 'primn', 'range', 'secdf', 'secdn', 'urban', 'cell_area']
10
+
11
+ LAND_DIFF_FEATURES = ['c3ann_diff', 'c3nfx_diff', 'c3per_diff','c4ann_diff', 'c4per_diff',
12
+ 'pastr_diff', 'primf_diff', 'primn_diff', 'range_diff', 'secdf_diff', 'secdn_diff', 'urban_diff']
13
+
14
+ FEATURES = LAND_FEATURES + LAND_DIFF_FEATURES
15
+ LABEL = "ELUC"
16
+
17
+ PATH_TO_DATASET = "merged_aggregated_dataset_1850_2022.zarr.zip"
18
+
19
+
20
+ def import_data(path, update_path):
21
+ raw = xr.open_zarr(path, consolidated=True)
22
+
23
+ # Get updated ELUC
24
+ if update_path:
25
+ eluc = xr.open_dataset(update_path)
26
+ raw = raw.drop_vars(["ELUC", "cell_area"])
27
+ raw = raw.merge(eluc)
28
+
29
+ # Shift actions back a year
30
+ raw[LAND_DIFF_FEATURES] = raw[LAND_DIFF_FEATURES].shift(time=-1)
31
+
32
+ # Old time shifting
33
+ # raw['ELUC'] = raw['ELUC'].shift(time=1)
34
+ # raw['ELUC_diff'] = raw['ELUC_diff'].shift(time=1)
35
+ # raw['time'] = raw.time - 1
36
+ # assert(list(np.unique(raw.time)) == list(range(1849, 2022)))
37
+
38
+ mask = raw["ELUC_diff"].isnull().compute()
39
+ raw = raw.where(~mask, drop=True)
40
+
41
+ country_mask = regionmask.defined_regions.natural_earth_v5_0_0.countries_110.mask(raw)
42
+ raw["country"] = country_mask
43
+ return raw
44
+
45
+
46
+ def da_to_df(da, countries_df):
47
+ df = da.to_dataframe()
48
+ df = df.dropna()
49
+ df['country_name'] = countries_df.loc[df['country'], 'names'].values
50
+ return df
51
+
52
+
53
+ def main():
54
+ raw = import_data(PATH_TO_DATASET, None)
55
+ countries_df = regionmask.defined_regions.natural_earth_v5_0_0.countries_110.to_dataframe()
56
+ df = da_to_df(raw, countries_df)
57
+ df = df.loc[1982:][FEATURES + [LABEL]]
58
+ df.to_csv("processed/eluc_1982.csv", index=True)
59
+
60
+
61
+ if __name__ == "__main__":
62
+ main()
demo.ipynb ADDED
@@ -0,0 +1,653 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "# pip install ipywidgets\n",
10
+ "# pip install plotly\n",
11
+ "# pip install ipympl"
12
+ ]
13
+ },
14
+ {
15
+ "cell_type": "code",
16
+ "execution_count": null,
17
+ "metadata": {},
18
+ "outputs": [],
19
+ "source": [
20
+ "import os\n",
21
+ "import numpy as np\n",
22
+ "import pandas as pd\n",
23
+ "from typing import Any\n",
24
+ "from typing import Dict\n",
25
+ "from typing import List\n",
26
+ "import warnings\n",
27
+ "import math\n",
28
+ "\n",
29
+ "import ipywidgets as widgets\n",
30
+ "from ipywidgets import interact, interactive, interact_manual, GridBox, Layout, VBox, HBox\n",
31
+ "import matplotlib.pyplot as plt\n",
32
+ "import plotly.graph_objs as go\n",
33
+ "from plotly.subplots import make_subplots\n",
34
+ "\n",
35
+ "from data_encoder import DataEncoder\n",
36
+ "\n",
37
+ "# Silence xgboost warnings\n",
38
+ "warnings.filterwarnings(\"ignore\")\n",
39
+ "from xgboost import XGBRegressor\n",
40
+ "from keras.models import load_model\n",
41
+ "\n",
42
+ "\n",
43
+ "pd.set_option('display.max_columns', None)\n",
44
+ "\n",
45
+ "%matplotlib inline\n",
46
+ "%matplotlib widget"
47
+ ]
48
+ },
49
+ {
50
+ "attachments": {},
51
+ "cell_type": "markdown",
52
+ "metadata": {},
53
+ "source": [
54
+ "# Dataset"
55
+ ]
56
+ },
57
+ {
58
+ "cell_type": "code",
59
+ "execution_count": null,
60
+ "metadata": {},
61
+ "outputs": [],
62
+ "source": [
63
+ "LAND_USE_COLS = ['c3ann', 'c3nfx', 'c3per', 'c4ann', 'pastr', 'range', 'secdf', 'secdn', 'urban']\n",
64
+ "DIFF_LAND_USE_COLS = [f\"{col}_diff\" for col in LAND_USE_COLS]\n",
65
+ "PRESCRIBED_LAND_USE_COLS = [f\"{col}_prescribed\" for col in LAND_USE_COLS]\n",
66
+ "OTHER_FEATURES_COLS = ['primf', 'primn', 'cell_area']\n",
67
+ "ALL_LAND_USE_COLS = ['primf', 'primn'] + LAND_USE_COLS\n",
68
+ "COLS_MAP = dict(zip(LAND_USE_COLS, DIFF_LAND_USE_COLS))\n",
69
+ "CHART_COLS = ALL_LAND_USE_COLS + [\"nonland\"]"
70
+ ]
71
+ },
72
+ {
73
+ "cell_type": "code",
74
+ "execution_count": null,
75
+ "metadata": {},
76
+ "outputs": [],
77
+ "source": [
78
+ "CONTEXT_COLUMNS = ['c3ann', 'c3nfx', 'c3per', 'c4ann', 'pastr', 'primf', 'primn', 'range', 'secdf', 'secdn', 'urban', 'cell_area']\n",
79
+ "ACTION_COLUMNS = ['c3ann_diff', 'c3nfx_diff', 'c3per_diff', 'c4ann_diff', 'pastr_diff', 'range_diff', 'secdf_diff', 'secdn_diff', 'urban_diff']\n",
80
+ "OUTCOME_COLUMNS = ['ELUC', 'Change']\n",
81
+ "CONTEXT_ACTION_COLUMNS = CONTEXT_COLUMNS + ACTION_COLUMNS"
82
+ ]
83
+ },
84
+ {
85
+ "cell_type": "code",
86
+ "execution_count": null,
87
+ "metadata": {},
88
+ "outputs": [],
89
+ "source": [
90
+ "DATASET_CSV = '../data/gcb/processed/uk_eluc.csv'\n",
91
+ "with open(DATASET_CSV) as df_file:\n",
92
+ " data_source_df = pd.read_csv(df_file)"
93
+ ]
94
+ },
95
+ {
96
+ "cell_type": "code",
97
+ "execution_count": null,
98
+ "metadata": {},
99
+ "outputs": [],
100
+ "source": [
101
+ "data_source_df.tail()"
102
+ ]
103
+ },
104
+ {
105
+ "attachments": {},
106
+ "cell_type": "markdown",
107
+ "metadata": {},
108
+ "source": [
109
+ "# Code"
110
+ ]
111
+ },
112
+ {
113
+ "cell_type": "code",
114
+ "execution_count": null,
115
+ "metadata": {},
116
+ "outputs": [],
117
+ "source": [
118
+ "fields = {'lat': {'data_type': 'FLOAT', 'has_nan': False, 'mean': 53.93974, 'range': [50.125, 58.625], 'std_dev': 2.2288961, 'sum': 4630295, 'valued': 'CONTINUOUS'},\n",
119
+ " 'lon': {'data_type': 'FLOAT', 'has_nan': False, 'mean': -2.7644422, 'range': [-7.375, 1.625], 'std_dev': 1.9270877, 'sum': -237305.25, 'valued': 'CONTINUOUS'},\n",
120
+ " 'ELUC': {'data_type': 'FLOAT', 'has_nan': False, 'mean': -0.021404957, 'range': [-1.2820702, 2.3366203], 'std_dev': 0.18355964, 'sum': -1837.4443, 'valued': 'CONTINUOUS'},\n",
121
+ " 'time': {'data_type': 'INT', 'has_nan': False, 'mean': 1936, 'range': [1851, 2021], 'std_dev': 49.362892, 'sum': 166190110, 'valued': 'CONTINUOUS'},\n",
122
+ " 'c3ann': {'data_type': 'FLOAT', 'has_nan': False, 'mean': 0.2667192, 'range': [0, 1], 'std_dev': 0.19391803, 'sum': 22895.709, 'valued': 'CONTINUOUS'},\n",
123
+ " 'c3nfx': {'data_type': 'FLOAT', 'has_nan': False, 'mean': 0.014878354, 'range': [0, 1], 'std_dev': 0.0128484, 'sum': 1277.1877, 'valued': 'CONTINUOUS'},\n",
124
+ " 'c3per': {'data_type': 'FLOAT', 'has_nan': False, 'mean': 0.00053631567, 'range': [0, 1], 'std_dev': 0.000610856, 'sum': 46.03841, 'valued': 'CONTINUOUS'},\n",
125
+ " 'c4ann': {'data_type': 'FLOAT', 'has_nan': False, 'mean': 0.0063492954, 'range': [0, 1], 'std_dev': 0.0056106453, 'sum': 545.0362, 'valued': 'CONTINUOUS'},\n",
126
+ " 'i_lat': {'data_type': 'FLOAT', 'has_nan': False, 'mean': 53.93974, 'range': [50.125, 58.625], 'std_dev': 2.2288961, 'sum': 4630295, 'valued': 'CONTINUOUS'},\n",
127
+ " 'i_lon': {'data_type': 'FLOAT', 'has_nan': False, 'mean': -2.7644422, 'range': [-7.375, 1.625], 'std_dev': 1.9270877, 'sum': -237305.25, 'valued': 'CONTINUOUS'},\n",
128
+ " 'pastr': {'data_type': 'FLOAT', 'has_nan': False, 'mean': 0.31008992, 'range': [0, 1], 'std_dev': 0.1939609, 'sum': 26618.738, 'valued': 'CONTINUOUS'},\n",
129
+ " 'primf': {'data_type': 'FLOAT', 'has_nan': False, 'mean': 3.1008868e-10, 'range': [0, 1], 'std_dev': 1.2718036e-09, 'sum': 2.6618633e-05, 'valued': 'CONTINUOUS'},\n",
130
+ " 'primn': {'data_type': 'FLOAT', 'has_nan': False, 'mean': 7.880206e-11, 'range': [0, 1], 'std_dev': 6.0690847e-10, 'sum': 6.7645265e-06, 'valued': 'CONTINUOUS'},\n",
131
+ " 'range': {'data_type': 'FLOAT', 'has_nan': False, 'mean': 0.058702312, 'range': [0, 1], 'std_dev': 0.12839052, 'sum': 5039.124, 'valued': 'CONTINUOUS'},\n",
132
+ " 'secdf': {'data_type': 'FLOAT', 'has_nan': False, 'mean': 0.18520375, 'range': [0, 1], 'std_dev': 0.19961607, 'sum': 15898.26, 'valued': 'CONTINUOUS'},\n",
133
+ " 'secdn': {'data_type': 'FLOAT', 'has_nan': False, 'mean': 0.06774911, 'range': [0, 1], 'std_dev': 0.1195767, 'sum': 5815.7197, 'valued': 'CONTINUOUS'},\n",
134
+ " 'urban': {'data_type': 'FLOAT', 'has_nan': False, 'mean': 0.030199211, 'range': [0, 1], 'std_dev': 0.06684742, 'sum': 2592.3606, 'valued': 'CONTINUOUS'},\n",
135
+ " 'ELUC_diff': {'data_type': 'FLOAT', 'has_nan': False, 'mean': 0.00085764704, 'range': [-5, 5], 'std_dev': 0.091957845, 'sum': 73.62214, 'valued': 'CONTINUOUS'},\n",
136
+ " 'cell_area': {'data_type': 'FLOAT', 'has_nan': False, 'mean': 45453.707, 'range': [40233.22, 49543.36], 'std_dev': 2439.213, 'sum': 3901837300, 'valued': 'CONTINUOUS'},\n",
137
+ " 'c3ann_diff': {'data_type': 'FLOAT', 'has_nan': False, 'mean': -0.0003815445, 'range': [-1, 1], 'std_dev': 0.0042161522, 'sum': -32.75254, 'valued': 'CONTINUOUS'},\n",
138
+ " 'c3nfx_diff': {'data_type': 'FLOAT', 'has_nan': False, 'mean': -2.3976065e-05, 'range': [-1, 1], 'std_dev': 0.00024510472, 'sum': -2.0581534, 'valued': 'CONTINUOUS'},\n",
139
+ " 'c3per_diff': {'data_type': 'FLOAT', 'has_nan': False, 'mean': -5.9571926e-07, 'range': [-1, 1], 'std_dev': 1.0220871e-05, 'sum': -0.05113773, 'valued': 'CONTINUOUS'},\n",
140
+ " 'c4ann_diff': {'data_type': 'FLOAT', 'has_nan': False, 'mean': -1.0171406e-05, 'range': [-1, 1], 'std_dev': 0.00010547795, 'sum': -0.8731338, 'valued': 'CONTINUOUS'},\n",
141
+ " 'pastr_diff': {'data_type': 'FLOAT', 'has_nan': False, 'mean': 0.0011081528, 'range': [-1, 1], 'std_dev': 0.0058669676, 'sum': 95.12605, 'valued': 'CONTINUOUS'},\n",
142
+ " 'range_diff': {'data_type': 'FLOAT', 'has_nan': False, 'mean': 0.00036852885, 'range': [-1, 1], 'std_dev': 0.007347369, 'sum': 31.635254, 'valued': 'CONTINUOUS'},\n",
143
+ " 'secdf_diff': {'data_type': 'FLOAT', 'has_nan': False, 'mean': -0.00081145874, 'range': [-1, 1], 'std_dev': 0.008251627, 'sum': -69.65724, 'valued': 'CONTINUOUS'},\n",
144
+ " 'secdn_diff': {'data_type': 'FLOAT', 'has_nan': False, 'mean': -0.0005189244, 'range': [-1, 1], 'std_dev': 0.0052026906, 'sum': -44.54551, 'valued': 'CONTINUOUS'},\n",
145
+ " 'urban_diff': {'data_type': 'FLOAT', 'has_nan': False, 'mean': 0.00026998913, 'range': [-1, 1], 'std_dev': 0.0007861656, 'sum': 23.176407, 'valued': 'CONTINUOUS'},\n",
146
+ " 'cell_area_diff': {'data_type': 'FLOAT', 'has_nan': False, 'mean': 45453.707, 'range': [40233.22, 49543.36], 'std_dev': 2439.213, 'sum': 3901837300, 'valued': 'CONTINUOUS'}}\n"
147
+ ]
148
+ },
149
+ {
150
+ "cell_type": "code",
151
+ "execution_count": null,
152
+ "metadata": {},
153
+ "outputs": [],
154
+ "source": [
155
+ "cao_mapping = {\n",
156
+ " 'context': ['lat', 'lon', 'time', 'c3ann', 'c3nfx', 'c3per', 'c4ann', 'i_lat', 'i_lon', 'pastr', 'primf', 'primn', 'range', 'secdf', 'secdn', 'urban', 'cell_area'],\n",
157
+ " 'actions': ['c3ann_diff', 'c3nfx_diff', 'c3per_diff', 'c4ann_diff', 'pastr_diff', 'range_diff', 'secdf_diff', 'secdn_diff', 'urban_diff'],\n",
158
+ " 'outcomes': ['ELUC', 'Change']}"
159
+ ]
160
+ },
161
+ {
162
+ "cell_type": "code",
163
+ "execution_count": null,
164
+ "metadata": {},
165
+ "outputs": [],
166
+ "source": [
167
+ "encoder = DataEncoder(fields, cao_mapping)"
168
+ ]
169
+ },
170
+ {
171
+ "cell_type": "code",
172
+ "execution_count": null,
173
+ "metadata": {},
174
+ "outputs": [],
175
+ "source": [
176
+ "min_lat = data_source_df[\"i_lat\"].min()\n",
177
+ "max_lat = data_source_df[\"i_lat\"].max()\n",
178
+ "min_lon = data_source_df[\"i_lon\"].min()\n",
179
+ "max_lon = data_source_df[\"i_lon\"].max()\n",
180
+ "min_time = data_source_df[\"time\"].min()\n",
181
+ "max_time = data_source_df[\"time\"].max()"
182
+ ]
183
+ },
184
+ {
185
+ "cell_type": "code",
186
+ "execution_count": null,
187
+ "metadata": {},
188
+ "outputs": [],
189
+ "source": [
190
+ "def _is_single_action_prescriptor(actions):\n",
191
+ " \"\"\"\n",
192
+ " Checks how many Actions have been defined in the Context, Actions, Outcomes mapping.\n",
193
+ " :return: True if only 1 action is defined, False otherwise\n",
194
+ " \"\"\"\n",
195
+ " return len(actions) == 1\n",
196
+ "\n",
197
+ "def _is_scalar(prescribed_action):\n",
198
+ " \"\"\"\n",
199
+ " Checks if the prescribed action contains a single value, i.e. a scalar, or an array.\n",
200
+ " A prescribed action contains a single value if it has been prescribed for a single context sample\n",
201
+ " :param prescribed_action: a scalar or an array\n",
202
+ " :return: True if the prescribed action contains a scalar, False otherwise.\n",
203
+ " \"\"\"\n",
204
+ " return prescribed_action.shape[0] == 1 and prescribed_action.shape[1] == 1\n",
205
+ "\n",
206
+ "def _convert_to_nn_input(context_df: pd.DataFrame) -> List[np.ndarray]:\n",
207
+ " \"\"\"\n",
208
+ " Converts a context DataFrame to a list of numpy arrays a neural network can ingest\n",
209
+ " :param context_df: a DataFrame containing inputs for a neural network. Number of inputs and size must match\n",
210
+ " :return: a list of numpy ndarray, on ndarray per neural network input\n",
211
+ " \"\"\"\n",
212
+ " # The NN expects a list of i inputs by s samples (e.g. 9 x 299).\n",
213
+ " # So convert the data frame to a numpy array (gives shape 299 x 9), transpose it (gives 9 x 299)\n",
214
+ " # and convert to list(list of 9 arrays of 299)\n",
215
+ " context_as_nn_input = list(context_df.to_numpy().transpose())\n",
216
+ " # Convert each column's list of 1D array to a 2D array\n",
217
+ " context_as_nn_input = [np.stack(context_as_nn_input[i], axis=0) for i in\n",
218
+ " range(len(context_as_nn_input))]\n",
219
+ " return context_as_nn_input"
220
+ ]
221
+ },
222
+ {
223
+ "cell_type": "code",
224
+ "execution_count": null,
225
+ "metadata": {},
226
+ "outputs": [],
227
+ "source": [
228
+ "def prescribe_from_model(prescriptor, context_df: pd.DataFrame) -> Dict[str, Any]:\n",
229
+ " \"\"\"\n",
230
+ " Generates prescriptions using the passed neural network candidate and context\n",
231
+ " :param prescriptor: a Keras neural network\n",
232
+ " ::param context_df: a DataFrame containing the context to prescribe for,\n",
233
+ " :return: a dictionary of action name to action value or list of action values\n",
234
+ " \"\"\"\n",
235
+ " action_list = ['recommended_land_use']\n",
236
+ " \n",
237
+ " # Convert the input df\n",
238
+ " context_as_nn_input = _convert_to_nn_input(context_df)\n",
239
+ " row_index = context_df.index\n",
240
+ " \n",
241
+ " # Get the prescrib?ed actions\n",
242
+ " prescribed_actions = prescriptor.predict(context_as_nn_input)\n",
243
+ " actions = {}\n",
244
+ "\n",
245
+ " if _is_single_action_prescriptor(action_list):\n",
246
+ " # Put the single action in an array to process it like multiple actions\n",
247
+ " prescribed_actions = [prescribed_actions]\n",
248
+ " \n",
249
+ " for i, action_col in enumerate(action_list):\n",
250
+ " if _is_scalar(prescribed_actions[i]):\n",
251
+ " # We have a single row and this action is numerical. Convert it to a scalar.\n",
252
+ " actions[action_col] = prescribed_actions[i].item()\n",
253
+ " else:\n",
254
+ " actions[action_col] = prescribed_actions[i].tolist()\n",
255
+ " \n",
256
+ " # Convert the prescribed actions to a DataFrame\n",
257
+ " prescribed_actions_df = pd.DataFrame(actions,\n",
258
+ " columns=action_list,\n",
259
+ " index=row_index)\n",
260
+ " return prescribed_actions_df"
261
+ ]
262
+ },
263
+ {
264
+ "cell_type": "code",
265
+ "execution_count": null,
266
+ "metadata": {},
267
+ "outputs": [],
268
+ "source": [
269
+ "def compute_percent_changed(encoded_context_actions_df):\n",
270
+ " # Sum the absolute values, but divide by 2 to avoid double counting\n",
271
+ " # Because positive diff is offset by negative diff\n",
272
+ " # context_action_df[DIFF_LAND_USE_COLS].abs().sum(axis=1) / 2\n",
273
+ "\n",
274
+ " encoded_context_actions_df = encoded_context_actions_df.reset_index(drop=True)\n",
275
+ " # Decode in order to get the signed land usage diff values\n",
276
+ " context_action_df = encoder.decode_as_df(encoded_context_actions_df)\n",
277
+ "\n",
278
+ " # Sum the positive diffs\n",
279
+ " percent_changed = context_action_df[context_action_df[DIFF_LAND_USE_COLS] > 0].sum(axis=1)\n",
280
+ " # Land usage is only a portion of that cell, e.g 0.8. Scale back to 1\n",
281
+ " # So that percent changed really represent the percentage of change within the land use\n",
282
+ " # portion of the cell\n",
283
+ " # I.e. how much of the pie chart has changed?\n",
284
+ " percent_changed = percent_changed / context_action_df[LAND_USE_COLS].sum(axis=1)\n",
285
+ " df = pd.DataFrame(percent_changed, columns=['Change'])\n",
286
+ " return df"
287
+ ]
288
+ },
289
+ {
290
+ "cell_type": "code",
291
+ "execution_count": null,
292
+ "metadata": {},
293
+ "outputs": [],
294
+ "source": [
295
+ "def run_prescriptor(prescriptor_model, sample_context_df):\n",
296
+ " encoded_sample_context_df = encoder.encode_as_df(sample_context_df)\n",
297
+ " prescribed_actions_df = prescribe_from_model(prescriptor_model, encoded_sample_context_df)\n",
298
+ " reco_land_use_df = pd.DataFrame(prescribed_actions_df.recommended_land_use.tolist(),\n",
299
+ " columns=LAND_USE_COLS)\n",
300
+ "\n",
301
+ " used = sum(sample_context_df[LAND_USE_COLS].iloc[0].tolist())\n",
302
+ " for col in LAND_USE_COLS:\n",
303
+ " reco_land_use_df[col] *= used\n",
304
+ "\n",
305
+ " # Reattach primf and primn\n",
306
+ " reco_land_use_df[\"primf\"] = sample_context_df[\"primf\"].to_numpy()\n",
307
+ " reco_land_use_df[\"primn\"] = sample_context_df[\"primn\"].to_numpy()\n",
308
+ "\n",
309
+ " # Assuming there's no primary land left in this cell\n",
310
+ " # TODO: not correct. Need to account for primf and primn, that can't increase (no way to return to primary forest)\n",
311
+ " prescribed_land_use_pct = reco_land_use_df.iloc[0][ALL_LAND_USE_COLS].sum() * 100\n",
312
+ " print(f\"Presribed land usage: {prescribed_land_use_pct:.2f}% of land\")\n",
313
+ " \n",
314
+ " return reco_land_use_df[ALL_LAND_USE_COLS]"
315
+ ]
316
+ },
317
+ {
318
+ "cell_type": "code",
319
+ "execution_count": null,
320
+ "metadata": {},
321
+ "outputs": [],
322
+ "source": [
323
+ "def run_predictor(predictor_model, context, actions):\n",
324
+ " encoded_sample_context_df = encoder.encode_as_df(sample_context_df)\n",
325
+ "\n",
326
+ " actions = [a / 100 for a in actions]\n",
327
+ " reco_land_use_df = pd.DataFrame([actions], columns=CHART_COLS)\n",
328
+ " reco_land_use_df = reco_land_use_df[LAND_USE_COLS]\n",
329
+ "\n",
330
+ " prescribed_actions_df = reco_land_use_df[LAND_USE_COLS].reset_index(drop=True) - sample_context_df[LAND_USE_COLS].reset_index(drop=True)\n",
331
+ " prescribed_actions_df.rename(COLS_MAP, axis=1, inplace=True)\n",
332
+ "\n",
333
+ " encoded_prescribed_actions_df = encoder.encode_as_df(prescribed_actions_df)\n",
334
+ "\n",
335
+ " encoded_context_actions_df = pd.concat([encoded_sample_context_df,\n",
336
+ " encoded_prescribed_actions_df],\n",
337
+ " axis=1)\n",
338
+ " \n",
339
+ " change_df = compute_percent_changed(encoded_context_actions_df)\n",
340
+ " \n",
341
+ " new_pred = predictor_model.predict(encoded_context_actions_df)\n",
342
+ " pred_df = pd.DataFrame(new_pred, columns=[\"ELUC\"])\n",
343
+ " # Decode output\n",
344
+ " out_df = encoder.decode_as_df(pred_df)\n",
345
+ " return out_df.iloc[0, 0], change_df.iloc[0, 0] * 100"
346
+ ]
347
+ },
348
+ {
349
+ "attachments": {},
350
+ "cell_type": "markdown",
351
+ "metadata": {},
352
+ "source": [
353
+ "# Predictor"
354
+ ]
355
+ },
356
+ {
357
+ "cell_type": "code",
358
+ "execution_count": null,
359
+ "metadata": {},
360
+ "outputs": [],
361
+ "source": [
362
+ "predictor_model = XGBRegressor()\n",
363
+ "predictor_model.load_model(\"predictors/xgboost_predictor.json\")"
364
+ ]
365
+ },
366
+ {
367
+ "attachments": {
368
+ "319f2a83-efbb-4017-83fb-c47e2e335906.png": {
369
+ "image/png": ""
370
+ }
371
+ },
372
+ "cell_type": "markdown",
373
+ "metadata": {},
374
+ "source": [
375
+ "# Prescriptors\n",
376
+ "![image.png](attachment:319f2a83-efbb-4017-83fb-c47e2e335906.png)"
377
+ ]
378
+ },
379
+ {
380
+ "cell_type": "code",
381
+ "execution_count": null,
382
+ "metadata": {},
383
+ "outputs": [],
384
+ "source": [
385
+ "PRESCRIPTOR_LIST = [\"1_1\", \"34_78\", \"50_67\", \"40_45\", \"30_28\", \"28_40\"]"
386
+ ]
387
+ },
388
+ {
389
+ "attachments": {},
390
+ "cell_type": "markdown",
391
+ "metadata": {},
392
+ "source": [
393
+ "# User Interface"
394
+ ]
395
+ },
396
+ {
397
+ "cell_type": "code",
398
+ "execution_count": null,
399
+ "metadata": {},
400
+ "outputs": [],
401
+ "source": [
402
+ "\n",
403
+ "sample_context_df = None\n",
404
+ "\n",
405
+ "out = widgets.Output()\n",
406
+ "\n",
407
+ "\"\"\"\n",
408
+ "Submits context and creates pie chart\n",
409
+ "Updates sliders for pie chart accordingly\n",
410
+ "\"\"\"\n",
411
+ "def prescribe(b):\n",
412
+ " prescriptor_model = load_prescriptor()\n",
413
+ " prescribed_df = run_prescriptor(prescriptor_model, sample_context_df)\n",
414
+ "\n",
415
+ " # Get other col back\n",
416
+ " data = prescribed_df.iloc[0].tolist()\n",
417
+ " other = fig[\"data\"][0].values[-1]\n",
418
+ " data.append(other)\n",
419
+ " data = dict(zip(CHART_COLS, data))\n",
420
+ "\n",
421
+ " for feature in CHART_COLS:\n",
422
+ " # Unlock everything\n",
423
+ " if feature in LAND_USE_COLS:\n",
424
+ " ticks[feature].value = False\n",
425
+ "\n",
426
+ " sliders[feature].unobserve(update_presc_plot, names=\"value\")\n",
427
+ " sliders[feature].value = data[feature] * 100\n",
428
+ " sliders[feature].observe(update_presc_plot, names=\"value\")\n",
429
+ " \n",
430
+ " # Clear figure and re-plot\n",
431
+ " fig[\"data\"] = (fig[\"data\"][0], )\n",
432
+ " fig.add_trace(go.Pie(values=list(data.values()), \n",
433
+ " labels=CHART_COLS, \n",
434
+ " domain=dict(x=[0.5, 1]), \n",
435
+ " title=\"Prescribed\"), row=1, col=2)\n",
436
+ "\n",
437
+ "\n",
438
+ "\"\"\"\n",
439
+ "Locks a slider so it isn't affected by the sum to 100 computation\n",
440
+ "\"\"\"\n",
441
+ "def lock(change):\n",
442
+ " if change[\"new\"]:\n",
443
+ " locked.add(change[\"owner\"])\n",
444
+ " else:\n",
445
+ " locked.remove(change[\"owner\"])\n",
446
+ "\n",
447
+ "\n",
448
+ "\"\"\"\n",
449
+ "Real-time updater for prescribed pie chart\n",
450
+ "\"\"\"\n",
451
+ "def update_presc_plot(change):\n",
452
+ " with fig.batch_update():\n",
453
+ " if len(fig[\"data\"]) > 1:\n",
454
+ " owner = change[\"owner\"]\n",
455
+ " \n",
456
+ " # First compute what percentage is locked, count locked/zero sliders, and see if this slider is locked\n",
457
+ " locked_sum = 0\n",
458
+ " zero_count = 0\n",
459
+ " owner_locked = False\n",
460
+ " for feat in sliders:\n",
461
+ " if sliders[feat] != owner and (ticks[feat] in locked or sliders[feat].value == 0):\n",
462
+ " locked_sum += sliders[feat].value\n",
463
+ " zero_count += 1\n",
464
+ " # TODO: this is yucky\n",
465
+ " if sliders[feat] == owner and ticks[feat] in locked:\n",
466
+ " owner_locked = True\n",
467
+ " break\n",
468
+ " \n",
469
+ " # Block update if everything else is locked/0 or this is locked\n",
470
+ " if owner_locked or zero_count == len(sliders) - 1:\n",
471
+ " owner.unobserve(update_presc_plot, names=\"value\")\n",
472
+ " owner.value = change[\"old\"]\n",
473
+ " owner.observe(update_presc_plot, names=\"value\")\n",
474
+ "\n",
475
+ " else:\n",
476
+ " # Add locked percentage to old and new because we don't factor\n",
477
+ " # them in to the 100% in our calculating the new value\n",
478
+ " old = change[\"old\"] + locked_sum\n",
479
+ " new = change[\"new\"] + locked_sum\n",
480
+ "\n",
481
+ " for feat in sliders:\n",
482
+ " slider = sliders[feat]\n",
483
+ " tick = ticks[feat]\n",
484
+ " if slider != owner and tick not in locked:\n",
485
+ " # Unobserve so we don't infinitely recurse\n",
486
+ " slider.unobserve(update_presc_plot, names=\"value\")\n",
487
+ " # old value / old total = new value / new total\n",
488
+ " # Must round to the same or higher place as the slider\n",
489
+ " assert(math.log10(slider.step) % 1 == 0)\n",
490
+ " slider.value = round(slider.value / (100 - old) * (100 - new), int(-1 * math.log10(slider.step)))\n",
491
+ " slider.observe(update_presc_plot, names=\"value\")\n",
492
+ "\n",
493
+ " fig[\"data\"][1][\"values\"] = [slider.value for slider in sliders.values()]\n",
494
+ "\n",
495
+ "\n",
496
+ "\"\"\"\n",
497
+ "Submits context and actions and outputs prediction\n",
498
+ "\"\"\"\n",
499
+ "def predict(b):\n",
500
+ " context = sample_context_df\n",
501
+ " actions = [slider.value for slider in sliders.values()]\n",
502
+ " outcome, change = run_predictor(predictor_model, context, actions)\n",
503
+ " output_area.value = f\"ELUC: {outcome} tC/ha/yr\\nChange: {change}%\"\n",
504
+ "\n",
505
+ "\n",
506
+ "\"\"\"\n",
507
+ "Computes the other column and adds it on to sample_context_df\n",
508
+ "\"\"\"\n",
509
+ "def compute_and_add_other(sample_context_df):\n",
510
+ " data = sample_context_df[ALL_LAND_USE_COLS]\n",
511
+ " diff = 1 - sample_context_df[ALL_LAND_USE_COLS].iloc[0].sum()\n",
512
+ " other_val = diff if diff >= 0 else 0\n",
513
+ " data[\"nonland\"] = [other_val]\n",
514
+ " return data\n",
515
+ "\n",
516
+ "\n",
517
+ "\"\"\"\n",
518
+ "Creates initial pie chart\n",
519
+ "\"\"\"\n",
520
+ "def show_context(c):\n",
521
+ " sample_df = data_source_df[(data_source_df.i_lat==latitude_input.value) & \n",
522
+ " (data_source_df.i_lon==longitude_input.value) &\n",
523
+ " (data_source_df.time==time_input.value)]\n",
524
+ " global sample_context_df\n",
525
+ " sample_context_df = sample_df[CONTEXT_COLUMNS]\n",
526
+ " #for testing purposes:\n",
527
+ " # sample_context_df[\"pastr\"].values[0] -= .12\n",
528
+ " # sample_context_df[\"primf\"].values[0] += 0.04\n",
529
+ " # sample_context_df[\"primn\"].values[0] += 0.04\n",
530
+ " # Plot initial context pie chart\n",
531
+ " data = compute_and_add_other(sample_context_df)\n",
532
+ " fig.add_trace(go.Pie(values=data.iloc[0].tolist(),\n",
533
+ " labels=CHART_COLS, \n",
534
+ " domain=dict(x=[0, 0.5]), \n",
535
+ " title=\"Current\"), row=1, col=1)\n",
536
+ "\n",
537
+ "def load_prescriptor():\n",
538
+ " print(f\"Selected prescriptor: {prescriptor_dropdown.value}\")\n",
539
+ " prescriptor_id = prescriptor_dropdown.value\n",
540
+ " prescriptor_model_filename = os.path.join(\"prescriptors\",\n",
541
+ " prescriptor_id + '.h5')\n",
542
+ "\n",
543
+ " print(f'Loading prescriptor model: {prescriptor_model_filename}')\n",
544
+ " prescriptor_model = load_model(prescriptor_model_filename, compile=False)\n",
545
+ " return prescriptor_model\n",
546
+ " \n",
547
+ "# Context\n",
548
+ "# Create the latitude input field\n",
549
+ "latitude_input = widgets.FloatText(description='Latitude:', value=51.625)\n",
550
+ "\n",
551
+ "# Create the longitude input field\n",
552
+ "longitude_input = widgets.FloatText(description='Longitude:', value=-3.375)\n",
553
+ "\n",
554
+ "# Create the time input field\n",
555
+ "time_input = widgets.IntText(description='Year:', value=2021)\n",
556
+ "\n",
557
+ "\"\"\"\n",
558
+ "Construct widgets and attach them to their functions\n",
559
+ "\"\"\"\n",
560
+ "sliders = {feature : widgets.FloatSlider(value=0.0, step=0.001, description=\"Prescribed \" + feature, style=dict(description_width='initial')) for feature in CHART_COLS}\n",
561
+ "ticks = {feature : widgets.Checkbox(value=False, description=\"Lock \" + feature, style=dict(description_width='initial')) for feature in CHART_COLS}\n",
562
+ "# Lock primaries and other\n",
563
+ "ticks[\"primf\"].value = True\n",
564
+ "ticks[\"primn\"].value = True\n",
565
+ "ticks[\"nonland\"].value = True\n",
566
+ "\n",
567
+ "# For use in locking and unlocking sliders\n",
568
+ "locked = set()\n",
569
+ "locked.add(ticks[\"primf\"])\n",
570
+ "locked.add(ticks[\"primn\"])\n",
571
+ "locked.add(ticks[\"nonland\"])\n",
572
+ "\n",
573
+ "prescribe_button = widgets.Button(description=\"Prescribe\")\n",
574
+ "prescribe_button.on_click(prescribe)\n",
575
+ "\n",
576
+ "predict_button = widgets.Button(description=\"Predict\")\n",
577
+ "predict_button.on_click(predict)\n",
578
+ "\n",
579
+ "\n",
580
+ "\"\"\"\n",
581
+ "Display Interactables and Figures\n",
582
+ "TODO: add titles, make layout prettier\n",
583
+ "\"\"\"\n",
584
+ "fig = go.FigureWidget(make_subplots(rows=1, cols=2, specs=[[{\"type\": \"pie\"}, {\"type\": \"pie\"}]]))\n",
585
+ "fig.update_layout(margin=dict(l=0, r=0, t=0, b=0))\n",
586
+ "\n",
587
+ "# Context\n",
588
+ "context_range = f\"Latitude must be between {min_lat} and {max_lat}, in 0.250 increments.\\nLongitude must be between {min_lon} and {max_lon}, in 0.250 increments.\\nYear must be between {min_time} and {max_time}.\"\n",
589
+ "text_area = widgets.Textarea(value=context_range,\n",
590
+ " rows=3,\n",
591
+ " layout=widgets.Layout(height=\"auto\", width=\"auto\"))\n",
592
+ "display(text_area)\n",
593
+ "\n",
594
+ "display(latitude_input, longitude_input, time_input)\n",
595
+ "\n",
596
+ "show_context_button = widgets.Button(description=\"Show land use\")\n",
597
+ "show_context_button.on_click(show_context)\n",
598
+ "display(show_context_button)\n",
599
+ "\n",
600
+ "# Prescribe\n",
601
+ "prescriptor_label = widgets.Label('Select a prescriptor:')\n",
602
+ "prescriptor_dropdown = widgets.Dropdown(options=PRESCRIPTOR_LIST)\n",
603
+ "display(prescriptor_label, prescriptor_dropdown)\n",
604
+ "\n",
605
+ "display(prescribe_button)\n",
606
+ "\n",
607
+ "# Attach sliders and boxes to their observers\n",
608
+ "for feat in sliders:\n",
609
+ " sliders[feat].observe(update_presc_plot, names=\"value\")\n",
610
+ " ticks[feat].observe(lock, names=\"value\")\n",
611
+ "\n",
612
+ "# Display sliders and boxes alongside figure\n",
613
+ "slider_box = VBox(list(sliders.values()))\n",
614
+ "tick_box = VBox(list(ticks.values()))\n",
615
+ "fig_box = VBox([fig])\n",
616
+ "display(HBox([slider_box, tick_box, fig_box]))\n",
617
+ "\n",
618
+ "# Predict\n",
619
+ "display(predict_button)\n",
620
+ "output_area = widgets.Textarea(value=\"\", rows=2, layout=widgets.Layout(height=\"auto\", width=\"auto\"))\n",
621
+ "display(output_area)\n"
622
+ ]
623
+ },
624
+ {
625
+ "cell_type": "code",
626
+ "execution_count": null,
627
+ "metadata": {},
628
+ "outputs": [],
629
+ "source": []
630
+ }
631
+ ],
632
+ "metadata": {
633
+ "kernelspec": {
634
+ "display_name": "Python 3 (ipykernel)",
635
+ "language": "python",
636
+ "name": "python3"
637
+ },
638
+ "language_info": {
639
+ "codemirror_mode": {
640
+ "name": "ipython",
641
+ "version": 3
642
+ },
643
+ "file_extension": ".py",
644
+ "mimetype": "text/x-python",
645
+ "name": "python",
646
+ "nbconvert_exporter": "python",
647
+ "pygments_lexer": "ipython3",
648
+ "version": "3.10.11"
649
+ }
650
+ },
651
+ "nbformat": 4,
652
+ "nbformat_minor": 4
653
+ }
predictors/.DS_Store ADDED
Binary file (6.15 kB). View file
 
predictors/__pycache__/ELUCNeuralNet.cpython-310.pyc ADDED
Binary file (2.07 kB). View file
 
predictors/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (163 Bytes). View file
 
predictors/download_predictors.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ from huggingface_hub import hf_hub_download
4
+
5
+
6
+ def main():
7
+ file_name_list = []
8
+ predictor_cfg = json.load(open("../predictors/predictors.json"))
9
+ for row in predictor_cfg["predictors"]:
10
+ file_name_list.append(row["filename"])
11
+
12
+ for predictor_name in file_name_list:
13
+ if not os.path.exists(predictor_name):
14
+ hf_hub_download(
15
+ token=os.environ.get("HF_TOKEN"),
16
+ repo_id="danyoung/eluc-dataset",
17
+ repo_type="dataset",
18
+ filename=predictor_name,
19
+ local_dir="./",
20
+ local_dir_use_symlinks=False)
21
+
22
+
23
+ if __name__ == "__main__":
24
+ main()
predictors/predictors.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"predictors": [
2
+ {
3
+ "name": "Linear Regression (West Europe)",
4
+ "filename": "143_linear.joblib"
5
+ },
6
+ {
7
+ "name": "Linear Regression (USA)",
8
+ "filename": "4_linear.joblib"
9
+ },
10
+ {
11
+ "name": "Linear Regression (Brazil)",
12
+ "filename": "29_linear.joblib"
13
+ },
14
+ {
15
+ "name": "Linear Regression (General)",
16
+ "filename": "ELUC_linear.joblib"
17
+ },
18
+ {
19
+ "name": "Random Forest (West Europe)",
20
+ "filename": "ELUC_forest.joblib"
21
+ }
22
+ ]}
prescriptors/100_100.h5 ADDED
Binary file (80.7 kB). View file
 
prescriptors/100_29.h5 ADDED
Binary file (80.7 kB). View file
 
prescriptors/100_40.h5 ADDED
Binary file (80.7 kB). View file
 
prescriptors/100_54.h5 ADDED
Binary file (80.7 kB). View file
 
prescriptors/100_58.h5 ADDED
Binary file (80.7 kB). View file
 
prescriptors/100_91.h5 ADDED
Binary file (80.7 kB). View file
 
prescriptors/100_96.h5 ADDED
Binary file (80.7 kB). View file
 
prescriptors/92_70.h5 ADDED
Binary file (80.7 kB). View file
 
prescriptors/97_97.h5 ADDED
Binary file (80.7 kB). View file
 
prescriptors/99_39.h5 ADDED
Binary file (80.7 kB). View file
 
prescriptors/99_51.h5 ADDED
Binary file (80.7 kB). View file
 
prescriptors/99_65.h5 ADDED
Binary file (80.7 kB). View file
 
prescriptors/99_78.h5 ADDED
Binary file (80.7 kB). View file
 
prescriptors/fields.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "ELUC":{"data_type":"FLOAT","has_nan":false,"mean":0.08431588113307953,"range":[-88.90611267089844,116.95401763916016],"std_dev":0.7141819000244141,"sum":1244851,"valued":"CONTINUOUS"},
3
+ "c3ann":{"data_type":"FLOAT","has_nan":false,"mean":0.05719335377216339,"range":[0,0.9272090196609497],"std_dev":0.13004545867443085,"sum":844410.375,"valued":"CONTINUOUS"},
4
+ "c3ann_diff":{"data_type":"FLOAT","has_nan":false,"mean":0.0001039158960338682,"range":[-1,1],"std_dev":0.003100739326328039,"sum":1534.228271484375,"valued":"CONTINUOUS"},
5
+ "c3nfx":{"data_type":"FLOAT","has_nan":false,"mean":0.01243751309812069,"range":[0,0.6590129733085632],"std_dev":0.04110949859023094,"sum":183629.125,"valued":"CONTINUOUS"},
6
+ "c3nfx_diff":{"data_type":"FLOAT","has_nan":false,"mean":0.000047470879508182406,"range":[-1,1],"std_dev":0.0009231835720129311,"sum":700.866455078125,"valued":"CONTINUOUS"},
7
+ "c3per":{"data_type":"FLOAT","has_nan":false,"mean":0.0064199501648545265,"range":[0,0.6860707998275757],"std_dev":0.025114575400948524,"sum":94785.0078125,"valued":"CONTINUOUS"},
8
+ "c3per_diff":{"data_type":"FLOAT","has_nan":false,"mean":0.000037744077417301014,"range":[-1,1],"std_dev":0.0007734607788734138,"sum":557.2586669921875,"valued":"CONTINUOUS"},
9
+ "c4ann":{"data_type":"FLOAT","has_nan":false,"mean":0.01571018248796463,"range":[0,0.9358039498329163],"std_dev":0.04956522956490517,"sum":231947.265625,"valued":"CONTINUOUS"},
10
+ "c4ann_diff":{"data_type":"FLOAT","has_nan":false,"mean":0.0000682401514495723,"range":[-1,1],"std_dev":0.0016709286719560623,"sum":1007.5067749023438,"valued":"CONTINUOUS"},
11
+ "c4per":{"data_type":"FLOAT","has_nan":false,"mean":0.0009445593459531665,"range":[0,0.7032631039619446],"std_dev":0.008503105491399765,"sum":13945.6015625,"valued":"CONTINUOUS"},
12
+ "c4per_diff":{"data_type":"FLOAT","has_nan":false,"mean":0.000008784978490439244,"range":[-1,1],"std_dev":0.0002543762675486505,"sum":129.70260620117188,"valued":"CONTINUOUS"},
13
+ "cell_area":{"data_type":"FLOAT","has_nan":false,"mean":54771.609375,"range":[8915.4794921875,77276.703125],"std_dev":18437.73046875,"sum":808655454208,"valued":"CONTINUOUS"},
14
+ "change":{"data_type":"FLOAT","has_nan":false,"mean":0.5,"range":[0,1],"std_dev":0.1,"sum":7382067,"valued":"CONTINUOUS"},
15
+ "pastr":{"data_type":"FLOAT","has_nan":false,"mean":0.04077955335378647,"range":[0,1],"std_dev":0.10672948509454727,"sum":602074.8125,"valued":"CONTINUOUS"},
16
+ "pastr_diff":{"data_type":"FLOAT","has_nan":false,"mean":0.00026207335758954287,"range":[-1,1],"std_dev":0.005082341376692057,"sum":3869.28662109375,"valued":"CONTINUOUS"},
17
+ "primf":{"data_type":"FLOAT","has_nan":false,"mean":0.19610066711902618,"range":[0,1],"std_dev":0.35063520073890686,"sum":2895256.75,"valued":"CONTINUOUS"},
18
+ "primf_diff":{"data_type":"FLOAT","has_nan":false,"mean":-0.0009334315545856953,"range":[-0.850843608379364,0],"std_dev":0.004068289417773485,"sum":-13781.3095703125,"valued":"CONTINUOUS"},
19
+ "primn":{"data_type":"FLOAT","has_nan":false,"mean":0.2566087543964386,"range":[0,1],"std_dev":0.3646445870399475,"sum":3788606.5,"valued":"CONTINUOUS"},
20
+ "primn_diff":{"data_type":"FLOAT","has_nan":false,"mean":-0.001117548905313015,"range":[-0.936556875705719,0],"std_dev":0.005212769843637943,"sum":-16499.642578125,"valued":"CONTINUOUS"},
21
+ "range":{"data_type":"FLOAT","has_nan":false,"mean":0.15799088776111603,"range":[0,1],"std_dev":0.28534045815467834,"sum":2332598.75,"valued":"CONTINUOUS"},
22
+ "range_diff":{"data_type":"FLOAT","has_nan":false,"mean":0.00040018701110966504,"range":[-1,1],"std_dev":0.011220048181712627,"sum":5908.4150390625,"valued":"CONTINUOUS"},
23
+ "secdf":{"data_type":"FLOAT","has_nan":false,"mean":0.10117984563112259,"range":[0,1],"std_dev":0.2359693944454193,"sum":1493832.875,"valued":"CONTINUOUS"},
24
+ "secdf_diff":{"data_type":"FLOAT","has_nan":false,"mean":0.0006275310879573226,"range":[-1,1],"std_dev":0.004725911188870668,"sum":9264.9541015625,"valued":"CONTINUOUS"},
25
+ "secdn":{"data_type":"FLOAT","has_nan":false,"mean":0.08007288724184036,"range":[0,1],"std_dev":0.18958471715450287,"sum":1182206.875,"valued":"CONTINUOUS"},
26
+ "secdn_diff":{"data_type":"FLOAT","has_nan":false,"mean":0.0004467177495826036,"range":[-1,1],"std_dev":0.009596005082130432,"sum":6595.4013671875,"valued":"CONTINUOUS"},
27
+ "urban":{"data_type":"FLOAT","has_nan":false,"mean":0.0025856471620500088,"range":[0,1],"std_dev":0.021832581609487534,"sum":38174.84375,"valued":"CONTINUOUS"},
28
+ "urban_diff":{"data_type":"FLOAT","has_nan":false,"mean":0.00004831538171856664,"range":[-0.15093612670898438,0.1676577627658844],"std_dev":0.0006846132455393672,"sum":713.3348388671875,"valued":"CONTINUOUS"}
29
+ }
prescriptors/pareto.csv ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ id,identity,ELUC,NSGA-II_crowding_distance,NSGA-II_rank,change,is_elite
2
+ 92_70,"{'ancestor_count': 6, 'ancestor_ids': ['90_54', '84_43'], 'birth_generation': 92, 'domain_name': None, 'experiment_version': 'LinearLeafLandUseDecode', 'unique_id': '92_70', 'origin': '90_54~CUW~84_43#MGNP'}",0.0182805009466933,inf,1,0.0667467755018339,True
3
+ 99_65,"{'ancestor_count': 11, 'ancestor_ids': ['98_81', '98_81'], 'birth_generation': 99, 'domain_name': None, 'experiment_version': 'LinearLeafLandUseDecode', 'unique_id': '99_65', 'origin': '98_81~CUW~98_81#MGNP'}",-5.987312544991907,0.3791044718989205,1,0.1012967088720754,True
4
+ 97_97,"{'ancestor_count': 95, 'ancestor_ids': ['96_14', '96_88'], 'birth_generation': 97, 'domain_name': None, 'experiment_version': 'LinearLeafLandUseDecode', 'unique_id': '97_97', 'origin': '96_14~CUW~96_88#MGNP'}",-15.413083904594863,0.3229220023442851,1,0.1719288420747773,True
5
+ 99_51,"{'ancestor_count': 98, 'ancestor_ids': ['98_49', '98_66'], 'birth_generation': 99, 'domain_name': None, 'experiment_version': 'LinearLeafLandUseDecode', 'unique_id': '99_51', 'origin': '98_49~CUW~98_66#MGNP'}",-24.47153974309113,0.1251737620298189,1,0.2249340374604773,False
6
+ 100_91,"{'ancestor_count': 99, 'ancestor_ids': ['99_15', '99_13'], 'birth_generation': 100, 'domain_name': None, 'experiment_version': 'LinearLeafLandUseDecode', 'unique_id': '100_91', 'origin': '99_15~CUW~99_13#MGNP'}",-28.439043897759447,0.1606969193537109,1,0.245644698452457,True
7
+ 100_58,"{'ancestor_count': 99, 'ancestor_ids': ['99_58', '99_48'], 'birth_generation': 100, 'domain_name': None, 'experiment_version': 'LinearLeafLandUseDecode', 'unique_id': '100_58', 'origin': '99_58~CUW~99_48#MGNP'}",-33.73895940604969,0.1366667417253438,1,0.2842010471172498,False
8
+ 100_96,"{'ancestor_count': 99, 'ancestor_ids': ['99_38', '99_51'], 'birth_generation': 100, 'domain_name': None, 'experiment_version': 'LinearLeafLandUseDecode', 'unique_id': '100_96', 'origin': '99_38~CUW~99_51#MGNP'}",-35.40003462234897,0.154603585947419,1,0.2927465910239528,True
9
+ 100_40,"{'ancestor_count': 99, 'ancestor_ids': ['99_39', '99_58'], 'birth_generation': 100, 'domain_name': None, 'experiment_version': 'LinearLeafLandUseDecode', 'unique_id': '100_40', 'origin': '99_39~CUW~99_58#MGNP'}",-39.91197247797104,0.124807711912608,1,0.3302908233641747,False
10
+ 100_29,"{'ancestor_count': 99, 'ancestor_ids': ['99_13', '99_21'], 'birth_generation': 100, 'domain_name': None, 'experiment_version': 'LinearLeafLandUseDecode', 'unique_id': '100_29', 'origin': '99_13~CUW~99_21#MGNP'}",-44.0336422258556,0.1583452126379033,1,0.3635849260809409,True
11
+ 100_54,"{'ancestor_count': 99, 'ancestor_ids': ['99_78', '99_38'], 'birth_generation': 100, 'domain_name': None, 'experiment_version': 'LinearLeafLandUseDecode', 'unique_id': '100_54', 'origin': '99_78~CUW~99_38#MGNP'}",-59.63589819674576,0.1512997596627784,1,0.4870467596605408,True
12
+ 100_100,"{'ancestor_count': 99, 'ancestor_ids': ['99_85', '99_78'], 'birth_generation': 100, 'domain_name': None, 'experiment_version': 'LinearLeafLandUseDecode', 'unique_id': '100_100', 'origin': '99_85~CUW~99_78#MGNP'}",-65.90170416132273,0.149356051396792,1,0.5662291414651899,True
13
+ 99_78,"{'ancestor_count': 98, 'ancestor_ids': ['98_15', '98_78'], 'birth_generation': 99, 'domain_name': None, 'experiment_version': 'LinearLeafLandUseDecode', 'unique_id': '99_78', 'origin': '98_15~CUW~98_78#MGNP'}",-66.05238226411777,0.141290265883081,1,0.6159501550968247,True
14
+ 99_39,"{'ancestor_count': 98, 'ancestor_ids': ['98_66', '93_62'], 'birth_generation': 99, 'domain_name': None, 'experiment_version': 'LinearLeafLandUseDecode', 'unique_id': '99_39', 'origin': '98_66~CUW~93_62#MGNP'}",-68.41783872386198,inf,1,0.7514420561129526,True
prescriptors/pareto_front.png ADDED
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dash==2.10.2
2
+ dash_bootstrap_components==1.4.1
3
+ gunicorn==21.2.0
4
+ huggingface_hub==0.16.4
5
+ joblib==1.2.0
6
+ tensorflow==2.13.0
7
+ keras==2.13.1
8
+ numpy==1.23.5
9
+ pandas==1.5.3
10
+ plotly==5.14.1
11
+ regionmask==0.10.0
12
+ scikit-learn==1.2.2
13
+ xarray==2023.6.0
tests/__init__.py ADDED
File without changes
tests/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (142 Bytes). View file
 
tests/__pycache__/test_app.cpython-310.pyc ADDED
Binary file (8.58 kB). View file
 
tests/test_app.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+ import pandas as pd
3
+ import json
4
+
5
+ import app.app as app
6
+ import app.constants as constants
7
+ import app.utils as utils
8
+ import app.Prescriptor as Prescriptor
9
+
10
+
11
+ class TestUtilFunctions(unittest.TestCase):
12
+
13
+ def setUp(self):
14
+ self.df = pd.read_csv(constants.DATA_FILE_PATH, index_col=constants.INDEX_COLS)
15
+
16
+ def test_add_nonland(self):
17
+ """
18
+ Simple vanilla test case for add_nonland().
19
+ """
20
+ data = [0, 0.01, 0.01, 0.2, 0.4, 0.02, 0.03, 0.01, 0.01, 0.05, 0.01, 0.1]
21
+ series = pd.Series(dict(zip(constants.LAND_USE_COLS, data)))
22
+ full = utils.add_nonland(series)
23
+ self.assertAlmostEqual(full["nonland"], 1 - sum(data), delta=constants.SLIDER_PRECISION)
24
+
25
+ def test_add_nonland_sum_over_one(self):
26
+ """
27
+ Makes sure if the columns sum to >1, we get 0 for nonland
28
+ """
29
+ data = [1 for _ in range(len(constants.LAND_USE_COLS))]
30
+ series = pd.Series(dict(zip(constants.LAND_USE_COLS, data)))
31
+ full = utils.add_nonland(series)
32
+ self.assertAlmostEqual(full["nonland"], 0, delta=constants.SLIDER_PRECISION)
33
+
34
+ def test_create_check_options_length(self):
35
+ values = ["a", "b", "c"]
36
+ options = utils.create_check_options(values)
37
+ self.assertEqual(len(options), len(values))
38
+
39
+ def test_create_check_options_values(self):
40
+ """
41
+ Checks if the values in the options are correct
42
+ """
43
+ values = ["a", "b", "c"]
44
+ options = utils.create_check_options(values)
45
+ for i in range(len(options)):
46
+ self.assertEqual(options[i]["value"], values[i])
47
+
48
+ def test_compute_percent_change(self):
49
+ """
50
+ Tests compute percent change on standard example.
51
+ """
52
+ context_data = [0.08, 0.08, 0.08, 0.08, 0.08, 0.08, 0.08, 0.08, 0.08, 0.08, 0.08, 0.12]
53
+ presc_data = [0.10, 0.06, 0.11, 0.05, 0.12, 0.04, 0.13, 0.03, 0.08]
54
+ context = pd.Series(dict(zip(constants.LAND_USE_COLS, context_data)))
55
+ presc = pd.Series(dict(zip(constants.RECO_COLS, presc_data)))
56
+
57
+ percent_change = utils.compute_percent_change(context, presc)
58
+ self.assertAlmostEqual(percent_change, 0.14, delta=constants.SLIDER_PRECISION)
59
+
60
+ def test_compute_percent_change_no_change(self):
61
+ """
62
+ Tests compute percent change when nothing changes.
63
+ """
64
+ context_data = [0.08, 0.08, 0.08, 0.08, 0.08, 0.08, 0.08, 0.08, 0.08, 0.08, 0.08, 0.12]
65
+ presc_data = context_data[0:6] + context_data [8:11]
66
+ context = pd.Series(dict(zip(constants.LAND_USE_COLS, context_data)))
67
+ presc = pd.Series(dict(zip(constants.RECO_COLS, presc_data)))
68
+
69
+ percent_change = utils.compute_percent_change(context, presc)
70
+ self.assertAlmostEqual(percent_change, 0, delta=constants.SLIDER_PRECISION)
71
+
72
+ def test_compute_percent_change_all_nonreco(self):
73
+ """
74
+ Tests compute change when there is only urban/primf/primn.
75
+ """
76
+ context_data = [0, 0, 0, 0, 0, 0, 0.33, 0.33, 0, 0, 0, 0.34]
77
+ presc_data = context_data[0:6] + context_data [8:11]
78
+ context = pd.Series(dict(zip(constants.LAND_USE_COLS, context_data)))
79
+ presc = pd.Series(dict(zip(constants.RECO_COLS, presc_data)))
80
+
81
+ percent_change = utils.compute_percent_change(context, presc)
82
+ self.assertEqual(percent_change, 0)
83
+
84
+ def test_compute_percent_change_not_sum_to_one(self):
85
+ """
86
+ Tests compute percent change on a context with some nonland.
87
+ """
88
+ context_data = [0.01 for _ in range(len(constants.LAND_USE_COLS))]
89
+ presc_data = [0.02, 0.00, 0.02, 0.00, 0.02, 0.00, 0.02, 0.00, 0.01]
90
+ context = pd.Series(dict(zip(constants.LAND_USE_COLS, context_data)))
91
+ presc = pd.Series(dict(zip(constants.RECO_COLS, presc_data)))
92
+
93
+ percent_change = utils.compute_percent_change(context, presc)
94
+ self.assertAlmostEqual(percent_change, 0.333333, delta=constants.SLIDER_PRECISION)
95
+
96
+
97
+ class TestEncoder(unittest.TestCase):
98
+ """
99
+ Since the encoded values are somewhat arbitrary based off what the prescriptor
100
+ is trained on, we have to test based off what is in the fields file.
101
+ """
102
+
103
+ def setUp(self):
104
+ self.df = pd.read_csv(constants.DATA_FILE_PATH, index_col=constants.INDEX_COLS)
105
+ self.encoder = None
106
+ self.fields = None
107
+ with open(constants.FIELDS_PATH, "r") as f:
108
+ self.fields = json.load(f)
109
+ self.encoder = utils.Encoder(self.fields)
110
+
111
+ def test_easy_case(self):
112
+ """
113
+ Tests encoding a simple case.
114
+ """
115
+ row = self.df.iloc[[0]]
116
+ row = row[constants.CONTEXT_COLUMNS]
117
+ pred = self.encoder.encode_as_df(row)
118
+
119
+ for col in constants.CONTEXT_COLUMNS:
120
+ range = self.fields[col]["range"]
121
+ # Min-max scale formula
122
+ true = (row[col].values[0] - range[0]) / (range[1] - range[0])
123
+ self.assertAlmostEqual(pred[col].values[0], true, delta=constants.SLIDER_PRECISION)
124
+
125
+ def test_non_field_cols(self):
126
+ """
127
+ Test that non-field columns are not encoded and excluded from final dataframe.
128
+ """
129
+ row = self.df.iloc[[0]]
130
+ row = row[constants.CONTEXT_COLUMNS]
131
+ row["test"] = 999
132
+ enc = self.encoder.encode_as_df(row)
133
+ # Make sure we didn't add the test column
134
+ self.assertEqual(sorted(list(enc.columns)), sorted(constants.CONTEXT_COLUMNS))
135
+
136
+ # Make sure we're still encoding
137
+ true = (row["primf"].values[0] - self.fields["primf"]["range"][0]) / (self.fields["primf"]["range"][1] - self.fields["primf"]["range"][0])
138
+ self.assertAlmostEqual(enc["primf"].values[0], true, delta=constants.SLIDER_PRECISION)
139
+
140
+ def test_multiple_input(self):
141
+ """
142
+ Tests we can pass in a multi-row dataframe and get proper encodings.
143
+ This isn't strictly necessary for our current use case, but it's good to test.
144
+ """
145
+ rows = self.df.iloc[0:2]
146
+ rows = rows[constants.CONTEXT_COLUMNS]
147
+ enc = self.encoder.encode_as_df(rows)
148
+
149
+ for col in constants.CONTEXT_COLUMNS:
150
+ minmax = self.fields[col]["range"]
151
+ for i in range(len(rows)):
152
+ val = rows.iloc[i][col]
153
+ true = (val - minmax[0]) / (minmax[1] - minmax[0])
154
+ self.assertAlmostEqual(enc.iloc[i][col], true, delta=constants.SLIDER_PRECISION)
155
+
156
+
157
+ class TestPrescriptor(unittest.TestCase):
158
+
159
+ def setUp(self):
160
+ self.df = pd.read_csv(constants.DATA_FILE_PATH, index_col=constants.INDEX_COLS)
161
+
162
+ pareto_df = pd.read_csv(constants.PARETO_CSV_PATH)
163
+ self.prescriptor_id_list = list(pareto_df["id"])
164
+
165
+ def test_load_all_prescriptors(self):
166
+ """
167
+ Checks if all the prescriptors are loadable
168
+ """
169
+ for presc_id in self.prescriptor_id_list:
170
+ presc = Prescriptor.Prescriptor(presc_id)
171
+ self.assertNotEqual(presc, None)
172
+
173
+ def test_prescribe_shape(self):
174
+ """
175
+ Tests if the prescribe function outputs something in the right shape
176
+ """
177
+ presc = Prescriptor.Prescriptor(self.prescriptor_id_list[0])
178
+ for i in range(1, 10):
179
+ sample_context_df = self.df.iloc[0:i][constants.CONTEXT_COLUMNS]
180
+
181
+ prescription = presc.run_prescriptor(sample_context_df)
182
+ self.assertEqual(set(prescription.columns), set(constants.RECO_COLS))
183
+ self.assertEqual(len(prescription), i)
184
+
185
+ def test_scale(self):
186
+ """
187
+ Tests if prescriptor properly scales land use back to what it should be.
188
+ """
189
+ presc = Prescriptor.Prescriptor(self.prescriptor_id_list[0])
190
+ sample_context_df = self.df.iloc[0:100][constants.CONTEXT_COLUMNS]
191
+ old_total = sample_context_df[constants.RECO_COLS].sum(axis=1).reset_index(drop=True)
192
+ prescription = presc.run_prescriptor(sample_context_df)
193
+ new_total = prescription.sum(axis=1)
194
+ self.assertEqual(old_total.equals(new_total), True)