NN implementation
#1
by
akseljoonas
- opened
- app.py +19 -28
- past_pollution_data.csv +2 -14
- past_weather_data.csv +1 -14
- pollution_data.csv +1 -14
- predictions_history.csv +11 -114
- requirements.txt +0 -2
- src/data_api_calls.py +6 -23
- src/features_pipeline.py +6 -38
- src/helper_functions.py +9 -53
- src/past_data_api_calls.py +4 -20
- src/predict.py +13 -103
- test.ipynb +327 -0
- weather_data.csv +0 -13
app.py
CHANGED
@@ -2,6 +2,7 @@ import altair as alt
|
|
2 |
import pandas as pd
|
3 |
import plotly.graph_objects as go
|
4 |
import streamlit as st
|
|
|
5 |
from src.helper_functions import custom_metric_box, pollution_box
|
6 |
from src.predict import get_data_and_predictions, update_data_and_predictions
|
7 |
|
@@ -44,7 +45,8 @@ col1, col2 = st.columns((1, 3))
|
|
44 |
# Create a 3-column layout
|
45 |
with col1:
|
46 |
st.subheader("Current Weather")
|
47 |
-
|
|
|
48 |
custom_metric_box(
|
49 |
label="🥵 Temperature",
|
50 |
value=f"{round(today['mean_temp'] * 0.1)} °C",
|
@@ -74,38 +76,38 @@ with col1:
|
|
74 |
with col2:
|
75 |
st.subheader("Current Pollution Levels")
|
76 |
sub1, sub2 = st.columns((1, 1))
|
77 |
-
|
78 |
# Ozone (O₃) Pollution Box
|
79 |
with sub1:
|
80 |
pollution_box(
|
81 |
label="O<sub>3</sub>",
|
82 |
value=f"{round(today['O3'])} µg/m³",
|
83 |
delta=f"{round(int(today['O3']) - int(previous_day['O3']))} µg/m³",
|
84 |
-
threshold=120
|
85 |
)
|
86 |
with st.expander("Learn more about O3", expanded=False):
|
87 |
st.markdown(
|
88 |
"""
|
89 |
*Ozone (O<sub>3</sub>)*: A harmful gas at ground level that can irritate the respiratory system and aggravate asthma.<br>
|
90 |
**Good/Bad**: "Good" means safe levels for most people, while "Bad" suggests harmful levels, especially for sensitive groups.
|
91 |
-
""",
|
92 |
unsafe_allow_html=True,
|
93 |
)
|
94 |
-
|
95 |
# Nitrogen Dioxide (NO₂) Pollution Box
|
96 |
with sub2:
|
97 |
pollution_box(
|
98 |
label="NO<sub>2</sub>",
|
99 |
value=f"{round(today['NO2'])} µg/m³",
|
100 |
delta=f"{round(int(today['NO2']) - int(previous_day['NO2']))} µg/m³",
|
101 |
-
threshold=40
|
102 |
)
|
103 |
with st.expander("Learn more about NO2", expanded=False):
|
104 |
st.markdown(
|
105 |
"""
|
106 |
*Nitrogen Dioxide (NO<sub>2</sub>)*: A toxic gas that contributes to lung irritation and worsens asthma and other respiratory issues.<br>
|
107 |
**Good/Bad**: "Good" means safe air quality, while "Bad" indicates levels that could cause respiratory problems, especially for vulnerable individuals.
|
108 |
-
""",
|
109 |
unsafe_allow_html=True,
|
110 |
)
|
111 |
|
@@ -116,12 +118,9 @@ with col2:
|
|
116 |
def get_simple_color_scale(values, threshold):
|
117 |
"""Returns green for values below the threshold, orange for values between the threshold and 2x the threshold, and red for values above 2x the threshold."""
|
118 |
return [
|
119 |
-
"#77C124"
|
120 |
-
if v < threshold
|
121 |
-
|
122 |
-
if v < 2 * threshold
|
123 |
-
else "#E63946"
|
124 |
-
for v in values
|
125 |
]
|
126 |
|
127 |
# O3 Bar Plot (threshold: 40)
|
@@ -143,17 +142,13 @@ with col2:
|
|
143 |
)
|
144 |
|
145 |
# Add predicted values with reduced opacity
|
146 |
-
predicted_o3_colors = get_simple_color_scale(
|
147 |
-
o3_future_values, 40
|
148 |
-
) # Color for future values
|
149 |
fig_o3.add_trace(
|
150 |
go.Bar(
|
151 |
x=df["Date"][-3:], # Dates for predicted values
|
152 |
y=o3_future_values,
|
153 |
name="O3 Predicted",
|
154 |
-
marker=dict(
|
155 |
-
color=predicted_o3_colors, opacity=0.5
|
156 |
-
), # Set opacity to 0.5 for predictions
|
157 |
hovertemplate="%{x|%d-%b-%Y}<br>%{y} µg/m³<extra></extra>",
|
158 |
)
|
159 |
)
|
@@ -184,7 +179,7 @@ with col2:
|
|
184 |
tickangle=-45,
|
185 |
tickcolor="gray",
|
186 |
),
|
187 |
-
showlegend=False
|
188 |
)
|
189 |
|
190 |
st.plotly_chart(fig_o3, key="fig_o3")
|
@@ -209,17 +204,13 @@ with col2:
|
|
209 |
)
|
210 |
|
211 |
# Add predicted values with reduced opacity
|
212 |
-
predicted_no2_colors = get_simple_color_scale(
|
213 |
-
no2_future_values, 120
|
214 |
-
) # Color for future values
|
215 |
fig_no2.add_trace(
|
216 |
go.Bar(
|
217 |
x=df["Date"][-3:], # Dates for predicted values
|
218 |
y=no2_future_values,
|
219 |
name="NO2 Predicted",
|
220 |
-
marker=dict(
|
221 |
-
color=predicted_no2_colors, opacity=0.5
|
222 |
-
), # Set opacity to 0.5 for predictions
|
223 |
hovertemplate="%{x|%d-%b-%Y}<br>%{y} µg/m³<extra></extra>",
|
224 |
)
|
225 |
)
|
@@ -250,7 +241,7 @@ with col2:
|
|
250 |
tickangle=-45,
|
251 |
tickcolor="gray",
|
252 |
),
|
253 |
-
showlegend=False
|
254 |
)
|
255 |
|
256 |
-
st.plotly_chart(fig_no2, key="fig_no2")
|
|
|
2 |
import pandas as pd
|
3 |
import plotly.graph_objects as go
|
4 |
import streamlit as st
|
5 |
+
|
6 |
from src.helper_functions import custom_metric_box, pollution_box
|
7 |
from src.predict import get_data_and_predictions, update_data_and_predictions
|
8 |
|
|
|
45 |
# Create a 3-column layout
|
46 |
with col1:
|
47 |
st.subheader("Current Weather")
|
48 |
+
|
49 |
+
|
50 |
custom_metric_box(
|
51 |
label="🥵 Temperature",
|
52 |
value=f"{round(today['mean_temp'] * 0.1)} °C",
|
|
|
76 |
with col2:
|
77 |
st.subheader("Current Pollution Levels")
|
78 |
sub1, sub2 = st.columns((1, 1))
|
79 |
+
|
80 |
# Ozone (O₃) Pollution Box
|
81 |
with sub1:
|
82 |
pollution_box(
|
83 |
label="O<sub>3</sub>",
|
84 |
value=f"{round(today['O3'])} µg/m³",
|
85 |
delta=f"{round(int(today['O3']) - int(previous_day['O3']))} µg/m³",
|
86 |
+
threshold=120
|
87 |
)
|
88 |
with st.expander("Learn more about O3", expanded=False):
|
89 |
st.markdown(
|
90 |
"""
|
91 |
*Ozone (O<sub>3</sub>)*: A harmful gas at ground level that can irritate the respiratory system and aggravate asthma.<br>
|
92 |
**Good/Bad**: "Good" means safe levels for most people, while "Bad" suggests harmful levels, especially for sensitive groups.
|
93 |
+
""",
|
94 |
unsafe_allow_html=True,
|
95 |
)
|
96 |
+
|
97 |
# Nitrogen Dioxide (NO₂) Pollution Box
|
98 |
with sub2:
|
99 |
pollution_box(
|
100 |
label="NO<sub>2</sub>",
|
101 |
value=f"{round(today['NO2'])} µg/m³",
|
102 |
delta=f"{round(int(today['NO2']) - int(previous_day['NO2']))} µg/m³",
|
103 |
+
threshold=40
|
104 |
)
|
105 |
with st.expander("Learn more about NO2", expanded=False):
|
106 |
st.markdown(
|
107 |
"""
|
108 |
*Nitrogen Dioxide (NO<sub>2</sub>)*: A toxic gas that contributes to lung irritation and worsens asthma and other respiratory issues.<br>
|
109 |
**Good/Bad**: "Good" means safe air quality, while "Bad" indicates levels that could cause respiratory problems, especially for vulnerable individuals.
|
110 |
+
""",
|
111 |
unsafe_allow_html=True,
|
112 |
)
|
113 |
|
|
|
118 |
def get_simple_color_scale(values, threshold):
|
119 |
"""Returns green for values below the threshold, orange for values between the threshold and 2x the threshold, and red for values above 2x the threshold."""
|
120 |
return [
|
121 |
+
"#77C124" if v < threshold else
|
122 |
+
"#E68B0A" if v < 2 * threshold else
|
123 |
+
"#E63946" for v in values
|
|
|
|
|
|
|
124 |
]
|
125 |
|
126 |
# O3 Bar Plot (threshold: 40)
|
|
|
142 |
)
|
143 |
|
144 |
# Add predicted values with reduced opacity
|
145 |
+
predicted_o3_colors = get_simple_color_scale(o3_future_values, 40) # Color for future values
|
|
|
|
|
146 |
fig_o3.add_trace(
|
147 |
go.Bar(
|
148 |
x=df["Date"][-3:], # Dates for predicted values
|
149 |
y=o3_future_values,
|
150 |
name="O3 Predicted",
|
151 |
+
marker=dict(color=predicted_o3_colors, opacity=0.5), # Set opacity to 0.5 for predictions
|
|
|
|
|
152 |
hovertemplate="%{x|%d-%b-%Y}<br>%{y} µg/m³<extra></extra>",
|
153 |
)
|
154 |
)
|
|
|
179 |
tickangle=-45,
|
180 |
tickcolor="gray",
|
181 |
),
|
182 |
+
showlegend=False # Disable legend
|
183 |
)
|
184 |
|
185 |
st.plotly_chart(fig_o3, key="fig_o3")
|
|
|
204 |
)
|
205 |
|
206 |
# Add predicted values with reduced opacity
|
207 |
+
predicted_no2_colors = get_simple_color_scale(no2_future_values, 120) # Color for future values
|
|
|
|
|
208 |
fig_no2.add_trace(
|
209 |
go.Bar(
|
210 |
x=df["Date"][-3:], # Dates for predicted values
|
211 |
y=no2_future_values,
|
212 |
name="NO2 Predicted",
|
213 |
+
marker=dict(color=predicted_no2_colors, opacity=0.5), # Set opacity to 0.5 for predictions
|
|
|
|
|
214 |
hovertemplate="%{x|%d-%b-%Y}<br>%{y} µg/m³<extra></extra>",
|
215 |
)
|
216 |
)
|
|
|
241 |
tickangle=-45,
|
242 |
tickcolor="gray",
|
243 |
),
|
244 |
+
showlegend=False # Disable legend
|
245 |
)
|
246 |
|
247 |
+
st.plotly_chart(fig_no2, key="fig_no2")
|
past_pollution_data.csv
CHANGED
@@ -10,21 +10,9 @@ date,NO2,O3
|
|
10 |
2023-10-26,21.53175675675676,13.3216
|
11 |
2023-10-27,23.07226666666666,16.15416666666666
|
12 |
2023-10-28,24.89121621621622,24.59040816326531
|
13 |
-
2023-10-29,9.
|
14 |
2023-10-30,11.20205479452055,52.820600000000006
|
15 |
2023-10-31,17.494666666666667,44.458541666666655
|
16 |
2023-11-01,21.588095238095235,29.20631578947369
|
17 |
2023-11-02,9.745714285714286,48.39760869565216
|
18 |
-
2023-11-03,7.
|
19 |
-
2023-11-04,12.660666666666666,45.56104166666666
|
20 |
-
2023-11-05,9.990135135135136,48.3988
|
21 |
-
2023-11-06,10.36972972972973,49.495
|
22 |
-
2023-11-07,16.654864864864866,39.8696
|
23 |
-
2023-11-08,15.846986301369862,44.5525
|
24 |
-
2023-11-09,14.630533333333332,46.897551020408166
|
25 |
-
2023-11-10,16.8868,46.03939999999999
|
26 |
-
2023-11-10,16.8868,46.03939999999999
|
27 |
-
2023-11-11,20.2668,40.0496
|
28 |
-
2023-11-12,22.7086301369863,25.88854166666667
|
29 |
-
2023-11-13,20.707466666666665,14.094000000000001
|
30 |
-
2023-11-14,10.368082191780822,59.828599999999994
|
|
|
10 |
2023-10-26,21.53175675675676,13.3216
|
11 |
2023-10-27,23.07226666666666,16.15416666666666
|
12 |
2023-10-28,24.89121621621622,24.59040816326531
|
13 |
+
2023-10-29,9.724428571428573,51.525200000000005
|
14 |
2023-10-30,11.20205479452055,52.820600000000006
|
15 |
2023-10-31,17.494666666666667,44.458541666666655
|
16 |
2023-11-01,21.588095238095235,29.20631578947369
|
17 |
2023-11-02,9.745714285714286,48.39760869565216
|
18 |
+
2023-11-03,7.163243243243242,61.421599999999984
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
past_weather_data.csv
CHANGED
@@ -15,17 +15,4 @@ date,temp,humidity,precip,windspeed,sealevelpressure,visibility,solarradiation
|
|
15 |
2023-10-30,11.2,90.4,13.0,18.4,997.5,28.8,27.0
|
16 |
2023-10-31,11.0,93.7,18.6,18.0,1000.7,17.9,29.8
|
17 |
2023-11-01,12.4,88.5,4.9,25.9,997.8,32.6,31.5
|
18 |
-
2023-11-02,11
|
19 |
-
2023-11-03,9.6,83.3,7.9,32.4,981.6,31.0,40.1
|
20 |
-
2023-11-04,9.3,88.7,10.7,25.9,977.6,35.3,20.4
|
21 |
-
2023-11-05,10.5,88.4,5.6,25.2,977.1,29.0,25.6
|
22 |
-
2023-11-06,9.9,86.5,1.8,31.7,998.2,37.3,45.2
|
23 |
-
2023-11-07,9.7,85.1,0.4,22.3,1007.3,41.1,63.6
|
24 |
-
2023-11-08,9.3,91.9,17.0,28.8,1008.2,21.2,37.0
|
25 |
-
2023-11-09,9.7,89.2,8.5,22.3,999.0,24.2,37.3
|
26 |
-
2023-11-10,7.5,90.0,4.4,22.3,995.3,36.0,20.8
|
27 |
-
2023-11-11,7.5,87.1,5.7,14.8,1003.3,40.0,25.2
|
28 |
-
2023-11-12,5.7,96.8,0.1,14.8,1006.4,8.8,26.2
|
29 |
-
2023-11-13,9.5,90.3,5.2,31.7,1000.8,9.6,9.8
|
30 |
-
2023-11-14,12.2,84.4,11.1,29.5,1001.4,14.8,16.0
|
31 |
-
2023-11-15,11.1,84.6,5.3,18.4,1011.9,21.4,22.8
|
|
|
15 |
2023-10-30,11.2,90.4,13.0,18.4,997.5,28.8,27.0
|
16 |
2023-10-31,11.0,93.7,18.6,18.0,1000.7,17.9,29.8
|
17 |
2023-11-01,12.4,88.5,4.9,25.9,997.8,32.6,31.5
|
18 |
+
2023-11-02,11,80,8.7,46.4,976.4,33.6,21.5
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pollution_data.csv
CHANGED
@@ -12,17 +12,4 @@ date,NO2,O3
|
|
12 |
2024-10-27,27.53722134983982,20.80809239842384
|
13 |
2024-10-28,23.337567567567568,26.82861788617886
|
14 |
2024-10-29,16.53533209586906,23.28254887605004
|
15 |
-
2024-10-30,22.26162162162162,18.03443548387097
|
16 |
-
2024-10-31,24.919333333333334,20.79696
|
17 |
-
2024-11-01,25.08013698630137,8.923140495867766
|
18 |
-
2024-11-02,12.71013698630137,42.33314049586777
|
19 |
-
2024-11-03,16.46013698630137,17.923140495867766
|
20 |
-
2024-11-04,17.54013698630137,15.923140495867768
|
21 |
-
2024-11-05,21.96013698630137,9.083140495867768
|
22 |
-
2024-11-06,28.70013698630137,19.313140495867767
|
23 |
-
2024-11-07,26.071060606060605,17.701454545454546
|
24 |
-
2024-11-08,17.671060606060603,14.171454545454546
|
25 |
-
2024-11-09,18.131060606060604,3.701454545454546
|
26 |
-
2024-11-10,17.83082191780822,12.727355371900826
|
27 |
-
2024-11-11,12.75082191780822,39.257355371900826
|
28 |
-
2024-11-12,8.180147058823529,23.352807017543856
|
|
|
12 |
2024-10-27,27.53722134983982,20.80809239842384
|
13 |
2024-10-28,23.337567567567568,26.82861788617886
|
14 |
2024-10-29,16.53533209586906,23.28254887605004
|
15 |
+
2024-10-30,22.26162162162162,18.03443548387097
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
predictions_history.csv
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
pollutant,date_predicted,date,prediction_value
|
2 |
O3,2024-10-14,2024-10-17,31.25335185244893
|
3 |
NO2,2024-10-14,2024-10-17,26.421736787446267
|
4 |
-
O3,2024-10-15,2024-10-17,
|
5 |
NO2,2024-10-15,2024-10-17,28.59511317503212
|
6 |
O3,2024-10-16,2024-10-17,9.657466070999735
|
7 |
NO2,2024-10-16,2024-10-17,17.065168790519902
|
@@ -13,11 +13,11 @@ O3,2024-10-17,2024-10-18,16.08841798553393
|
|
13 |
NO2,2024-10-17,2024-10-18,32.0458143607889
|
14 |
O3,2024-10-16,2024-10-19,24.031357603260783
|
15 |
NO2,2024-10-16,2024-10-19,20.08389395558791
|
16 |
-
O3,2024-10-17,2024-10-19,
|
17 |
NO2,2024-10-17,2024-10-19,27.08389395558791
|
18 |
-
O3,2024-10-17,2024-10-20,
|
19 |
NO2,2024-10-17,2024-10-20,23.84300578029378
|
20 |
-
O3,2024-10-18,2024-10-19,
|
21 |
NO2,2024-10-18,2024-10-19,20.80017116560889
|
22 |
O3,2024-10-18,2024-10-20,31.25335185244893
|
23 |
NO2,2024-10-18,2024-10-20,29.732316066240585
|
@@ -25,15 +25,15 @@ O3,2024-10-18,2024-10-21,28.67755196805434
|
|
25 |
NO2,2024-10-18,2024-10-21,35.04638743773354
|
26 |
O3,2024-10-19,2024-10-20,26.421736787446267
|
27 |
NO2,2024-10-19,2024-10-20,27.399885723190767
|
28 |
-
O3,2024-10-19,2024-10-21,
|
29 |
NO2,2024-10-19,2024-10-21,18.992352714813563
|
30 |
O3,2024-10-19,2024-10-22,17.39682962048955
|
31 |
NO2,2024-10-19,2024-10-22,22.85061675885908
|
32 |
-
O3,2024-10-20,2024-10-21,
|
33 |
NO2,2024-10-20,2024-10-21,18.27191592927812
|
34 |
O3,2024-10-20,2024-10-22,29.00940466937953
|
35 |
NO2,2024-10-20,2024-10-22,19.50739766963497
|
36 |
-
O3,2024-10-20,2024-10-23,
|
37 |
NO2,2024-10-20,2024-10-23,23.65746607099973
|
38 |
O3,2024-10-21,2024-10-22,17.497382318189132
|
39 |
NO2,2024-10-21,2024-10-22,28.59511317503212
|
@@ -41,135 +41,32 @@ O3,2024-10-21,2024-10-23,16.519952190354232
|
|
41 |
NO2,2024-10-21,2024-10-23,30.192389708351826
|
42 |
O3,2024-10-21,2024-10-24,28.19940385112904
|
43 |
NO2,2024-10-21,2024-10-24,17.9525039623211
|
44 |
-
O3,2024-10-22,2024-10-23,
|
45 |
NO2,2024-10-22,2024-10-23,25.217639978187005
|
46 |
O3,2024-10-22,2024-10-24,23.605545201596552
|
47 |
NO2,2024-10-22,2024-10-24,29.004701753536988
|
48 |
-
O3,2024-10-22,2024-10-25,27.9847340998457
|
49 |
-
NO2,2024-10-22,2024-10-25,21.09454928349811
|
50 |
O3,2024-10-23,2024-10-24,26.56486295059828
|
51 |
NO2,2024-10-23,2024-10-24,20.15373733747257
|
52 |
-
O3,2024-10-23,2024-10-25,23.7837523402934
|
53 |
-
NO2,2024-10-23,2024-10-25,17.8934409250984
|
54 |
-
O3,2024-10-23,2024-10-26,28.9427902487123
|
55 |
-
NO2,2024-10-23,2024-10-26,18.4259830043909
|
56 |
O3,2024-10-24,2024-10-25,10.33808859423279
|
57 |
NO2,2024-10-24,2024-10-25,25.68519991558237
|
58 |
-
O3,2024-10-24,2024-10-26,
|
59 |
NO2,2024-10-24,2024-10-26,25.760307451092384
|
60 |
-
O3,2024-10-24,2024-10-27,
|
61 |
NO2,2024-10-24,2024-10-27,31.210576791105115
|
62 |
O3,2024-10-25,2024-10-26,20.48055947200643
|
63 |
NO2,2024-10-25,2024-10-26,23.95723903986424
|
64 |
O3,2024-10-25,2024-10-27,11.088152958498888
|
65 |
NO2,2024-10-25,2024-10-27,32.274494671100506
|
66 |
O3,2024-10-25,2024-10-28,-0.7175631399505704
|
67 |
-
NO2,2024-10-25,2024-10-28,40.
|
68 |
-
O3,2024-10-26,2024-10-27,17.489498527754026
|
69 |
-
NO2,2024-10-26,2024-10-27,15.98010248203777
|
70 |
-
O3,2024-10-26,2024-10-28,18.2072938736443
|
71 |
-
NO2,2024-10-26,2024-10-28,20.8453424245324
|
72 |
-
O3,2024-10-26,2024-10-29,29.0932876923023
|
73 |
-
NO2,2024-10-26,2024-10-29,18.2090842283492
|
74 |
-
O3,2024-10-27,2024-10-28,23.123594856598
|
75 |
-
NO2,2024-10-27,2024-10-28,16.217932302093
|
76 |
-
O3,2024-10-27,2024-10-29,19.503484893273
|
77 |
-
NO2,2024-10-27,2024-10-29,22.023348685
|
78 |
-
O3,2024-10-27,2024-10-30,21.6890289354354
|
79 |
-
NO2,2024-10-27,2024-10-30,23.27534807834
|
80 |
O3,2024-10-28,2024-10-29,22.13652238154496
|
81 |
NO2,2024-10-28,2024-10-29,31.608886931951144
|
82 |
O3,2024-10-28,2024-10-30,15.841669224
|
83 |
NO2,2024-10-28,2024-10-30,34.564284711452984
|
84 |
O3,2024-10-28,2024-10-31,22.35944571003375
|
85 |
NO2,2024-10-28,2024-10-31,34.37482132111927
|
86 |
-
O3,2024-10-29,2024-10-30,20.53543580394545
|
87 |
-
NO2,2024-10-29,2024-10-30,18.74548598422698
|
88 |
-
O3,2024-10-29,2024-10-31,18.36287520452959
|
89 |
-
NO2,2024-10-29,2024-10-31,17.2457634958273
|
90 |
-
O3,2024-10-29,2024-11-01,16.9829845792475
|
91 |
-
NO2,2024-10-29,2024-11-01,15.984094509529
|
92 |
O3,2024-10-30,2024-10-31,15.98046542733637
|
93 |
NO2,2024-10-30,2024-10-31,29.77507241979599
|
94 |
O3,2024-10-30,2024-11-01,21.135906183680472
|
95 |
NO2,2024-10-30,2024-11-01,28.38872595850704
|
96 |
O3,2024-10-30,2024-11-02,19.67426015042635
|
97 |
-
NO2,2024-10-30,2024-11-02,26.9572340984345
|
98 |
-
O3,2024-10-31,2024-11-01,16.491393851863755
|
99 |
-
NO2,2024-10-31,2024-11-01,17.22825222459993
|
100 |
-
O3,2024-10-31,2024-11-02,16.874728806873033
|
101 |
-
NO2,2024-10-31,2024-11-02,14.771381333796963
|
102 |
-
O3,2024-10-31,2024-11-03,15.244292496093546
|
103 |
-
NO2,2024-10-31,2024-11-03,14.606430068166452
|
104 |
-
NO2,2024-11-01,2024-11-02,27.60903681874446
|
105 |
-
O3,2024-11-01,2024-11-02,34.42005311364765
|
106 |
-
NO2,2024-11-01,2024-11-03,27.60903681874446
|
107 |
-
O3,2024-11-01,2024-11-03,11.420053113647649
|
108 |
-
NO2,2024-11-01,2024-11-04,27.60903681874446
|
109 |
-
O3,2024-11-01,2024-11-04,11.420053113647649
|
110 |
-
NO2,2024-11-02,2024-11-03,28.648331465973826
|
111 |
-
O3,2024-11-02,2024-11-03,19.22638136470001
|
112 |
-
NO2,2024-11-02,2024-11-04,28.648331465973826
|
113 |
-
O3,2024-11-02,2024-11-04,19.22638136470001
|
114 |
-
NO2,2024-11-02,2024-11-05,28.648331465973826
|
115 |
-
O3,2024-11-02,2024-11-05,19.22638136470001
|
116 |
-
NO2,2024-11-03,2024-11-04,27.00186266225546
|
117 |
-
O3,2024-11-03,2024-11-04,21.27062113852946
|
118 |
-
NO2,2024-11-03,2024-11-05,27.00186266225546
|
119 |
-
O3,2024-11-03,2024-11-05,26.27062113852946
|
120 |
-
NO2,2024-11-03,2024-11-06,27.00186266225546
|
121 |
-
O3,2024-11-03,2024-11-06,26.27062113852946
|
122 |
-
NO2,2024-11-04,2024-11-05,15.639721183473046
|
123 |
-
O3,2024-11-04,2024-11-05,17.485704138409645
|
124 |
-
NO2,2024-11-04,2024-11-06,15.639721183473046
|
125 |
-
O3,2024-11-04,2024-11-06,28.485704138409645
|
126 |
-
NO2,2024-11-04,2024-11-07,15.639721183473046
|
127 |
-
O3,2024-11-04,2024-11-07,28.485704138409645
|
128 |
-
NO2,2024-11-05,2024-11-06,21.25135096675789
|
129 |
-
O3,2024-11-05,2024-11-06,14.976870014538976
|
130 |
-
NO2,2024-11-05,2024-11-07,21.25135096675789
|
131 |
-
O3,2024-11-05,2024-11-07,14.976870014538976
|
132 |
-
NO2,2024-11-05,2024-11-08,21.25135096675789
|
133 |
-
O3,2024-11-05,2024-11-08,14.976870014538976
|
134 |
-
O3,2024-11-06,2024-11-07,19.68971431379357
|
135 |
-
NO2,2024-11-06,2024-11-07,23.084000795655953
|
136 |
-
O3,2024-11-06,2024-11-08,13.40804818820844
|
137 |
-
NO2,2024-11-06,2024-11-08,14.957130244115683
|
138 |
-
O3,2024-11-06,2024-11-09,22.03985252007605
|
139 |
-
NO2,2024-11-06,2024-11-09,10.588264867150572
|
140 |
-
O3,2024-11-07,2024-11-08,10.140579432131872
|
141 |
-
NO2,2024-11-07,2024-11-08,26.943305282744824
|
142 |
-
O3,2024-11-07,2024-11-09,14.726872459118134
|
143 |
-
NO2,2024-11-07,2024-11-09,16.882629842776268
|
144 |
-
O3,2024-11-07,2024-11-10,19.411520595892956
|
145 |
-
NO2,2024-11-07,2024-11-10,11.844857190714402
|
146 |
-
O3,2024-11-8,2024-11-9,6.594300618755156
|
147 |
-
NO2,2024-11-8,2024-11-9,19.24076043419152
|
148 |
-
O3,2024-11-8,2024-11-10,13.499823957840668
|
149 |
-
NO2,2024-11-8,2024-11-10,18.48512032218451
|
150 |
-
O3,2024-11-8,2024-11-11,11.875179042709217
|
151 |
-
NO2,2024-11-8,2024-11-11,16.574502215589668
|
152 |
-
O3,2024-11-9,2024-11-10,13.499823957840668
|
153 |
-
NO2,2024-11-9,2024-11-10,18.48512032218451
|
154 |
-
O3,2024-11-9,2024-11-11,9.499823957840668
|
155 |
-
NO2,2024-11-9,2024-11-11,18.48512032218451
|
156 |
-
O3,2024-11-9,2024-11-12,11.875179042709217
|
157 |
-
NO2,2024-11-9,2024-11-12,16.574502215589668
|
158 |
-
O3,2024-11-10,2024-11-11,16.594300618755156
|
159 |
-
NO2,2024-11-10,2024-11-11,19.24076043419152
|
160 |
-
O3,2024-11-10,2024-11-12,9.499823957840668
|
161 |
-
NO2,2024-11-10,2024-11-12,18.48512032218451
|
162 |
-
O3,2024-11-10,2024-11-13,11.875179042709217
|
163 |
-
NO2,2024-11-10,2024-11-13,16.574502215589668
|
164 |
-
O3,2024-11-11,2024-11-12,16.247626282257677
|
165 |
-
NO2,2024-11-11,2024-11-12,19.094817776178502
|
166 |
-
O3,2024-11-11,2024-11-13,16.676783602296
|
167 |
-
NO2,2024-11-11,2024-11-13,16.50993839661109
|
168 |
-
O3,2024-11-11,2024-11-14,24.877041509403615
|
169 |
-
NO2,2024-11-11,2024-11-14,10.777386108715564
|
170 |
-
O3,2024-11-12,2024-11-13,16.247626282257677
|
171 |
-
NO2,2024-11-12,2024-11-13,19.094817776178502
|
172 |
-
O3,2024-11-12,2024-11-14,16.676783602296
|
173 |
-
NO2,2024-11-12,2024-11-14,16.50993839661109
|
174 |
-
O3,2024-11-12,2024-11-15,24.877041509403615
|
175 |
-
NO2,2024-11-12,2024-11-15,10.777386108715564
|
|
|
1 |
pollutant,date_predicted,date,prediction_value
|
2 |
O3,2024-10-14,2024-10-17,31.25335185244893
|
3 |
NO2,2024-10-14,2024-10-17,26.421736787446267
|
4 |
+
O3,2024-10-15,2024-10-17,22.00005767760448
|
5 |
NO2,2024-10-15,2024-10-17,28.59511317503212
|
6 |
O3,2024-10-16,2024-10-17,9.657466070999735
|
7 |
NO2,2024-10-16,2024-10-17,17.065168790519902
|
|
|
13 |
NO2,2024-10-17,2024-10-18,32.0458143607889
|
14 |
O3,2024-10-16,2024-10-19,24.031357603260783
|
15 |
NO2,2024-10-16,2024-10-19,20.08389395558791
|
16 |
+
O3,2024-10-17,2024-10-19,21.031357603260783
|
17 |
NO2,2024-10-17,2024-10-19,27.08389395558791
|
18 |
+
O3,2024-10-17,2024-10-20,20.48486247979324
|
19 |
NO2,2024-10-17,2024-10-20,23.84300578029378
|
20 |
+
O3,2024-10-18,2024-10-19,22.304547122637445
|
21 |
NO2,2024-10-18,2024-10-19,20.80017116560889
|
22 |
O3,2024-10-18,2024-10-20,31.25335185244893
|
23 |
NO2,2024-10-18,2024-10-20,29.732316066240585
|
|
|
25 |
NO2,2024-10-18,2024-10-21,35.04638743773354
|
26 |
O3,2024-10-19,2024-10-20,26.421736787446267
|
27 |
NO2,2024-10-19,2024-10-20,27.399885723190767
|
28 |
+
O3,2024-10-19,2024-10-21,17.065168790519902
|
29 |
NO2,2024-10-19,2024-10-21,18.992352714813563
|
30 |
O3,2024-10-19,2024-10-22,17.39682962048955
|
31 |
NO2,2024-10-19,2024-10-22,22.85061675885908
|
32 |
+
O3,2024-10-20,2024-10-21,22.00005767760448
|
33 |
NO2,2024-10-20,2024-10-21,18.27191592927812
|
34 |
O3,2024-10-20,2024-10-22,29.00940466937953
|
35 |
NO2,2024-10-20,2024-10-22,19.50739766963497
|
36 |
+
O3,2024-10-20,2024-10-23,20.062134354543343
|
37 |
NO2,2024-10-20,2024-10-23,23.65746607099973
|
38 |
O3,2024-10-21,2024-10-22,17.497382318189132
|
39 |
NO2,2024-10-21,2024-10-22,28.59511317503212
|
|
|
41 |
NO2,2024-10-21,2024-10-23,30.192389708351826
|
42 |
O3,2024-10-21,2024-10-24,28.19940385112904
|
43 |
NO2,2024-10-21,2024-10-24,17.9525039623211
|
44 |
+
O3,2024-10-22,2024-10-23,16.093074246425157
|
45 |
NO2,2024-10-22,2024-10-23,25.217639978187005
|
46 |
O3,2024-10-22,2024-10-24,23.605545201596552
|
47 |
NO2,2024-10-22,2024-10-24,29.004701753536988
|
|
|
|
|
48 |
O3,2024-10-23,2024-10-24,26.56486295059828
|
49 |
NO2,2024-10-23,2024-10-24,20.15373733747257
|
|
|
|
|
|
|
|
|
50 |
O3,2024-10-24,2024-10-25,10.33808859423279
|
51 |
NO2,2024-10-24,2024-10-25,25.68519991558237
|
52 |
+
O3,2024-10-24,2024-10-26,16.000984317626852
|
53 |
NO2,2024-10-24,2024-10-26,25.760307451092384
|
54 |
+
O3,2024-10-24,2024-10-27,19.64377495640328
|
55 |
NO2,2024-10-24,2024-10-27,31.210576791105115
|
56 |
O3,2024-10-25,2024-10-26,20.48055947200643
|
57 |
NO2,2024-10-25,2024-10-26,23.95723903986424
|
58 |
O3,2024-10-25,2024-10-27,11.088152958498888
|
59 |
NO2,2024-10-25,2024-10-27,32.274494671100506
|
60 |
O3,2024-10-25,2024-10-28,-0.7175631399505704
|
61 |
+
NO2,2024-10-25,2024-10-28,40.86107800019054
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
O3,2024-10-28,2024-10-29,22.13652238154496
|
63 |
NO2,2024-10-28,2024-10-29,31.608886931951144
|
64 |
O3,2024-10-28,2024-10-30,15.841669224
|
65 |
NO2,2024-10-28,2024-10-30,34.564284711452984
|
66 |
O3,2024-10-28,2024-10-31,22.35944571003375
|
67 |
NO2,2024-10-28,2024-10-31,34.37482132111927
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
O3,2024-10-30,2024-10-31,15.98046542733637
|
69 |
NO2,2024-10-30,2024-10-31,29.77507241979599
|
70 |
O3,2024-10-30,2024-11-01,21.135906183680472
|
71 |
NO2,2024-10-30,2024-11-01,28.38872595850704
|
72 |
O3,2024-10-30,2024-11-02,19.67426015042635
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -10,5 +10,3 @@ http.client
|
|
10 |
datetime
|
11 |
huggingface-hub
|
12 |
python-dotenv
|
13 |
-
torch
|
14 |
-
safetensors
|
|
|
10 |
datetime
|
11 |
huggingface-hub
|
12 |
python-dotenv
|
|
|
|
src/data_api_calls.py
CHANGED
@@ -14,11 +14,7 @@ WEATHER_DATA_FILE = "weather_data.csv"
|
|
14 |
POLLUTION_DATA_FILE = "pollution_data.csv"
|
15 |
|
16 |
|
17 |
-
def update_weather_data()
|
18 |
-
"""
|
19 |
-
Updates weather data by fetching data.
|
20 |
-
If the data file exists, it appends new data. If not, it creates a new file.
|
21 |
-
"""
|
22 |
today = date.today().isoformat()
|
23 |
|
24 |
if os.path.exists(WEATHER_DATA_FILE):
|
@@ -54,11 +50,7 @@ def update_weather_data() -> None:
|
|
54 |
sys.exit()
|
55 |
|
56 |
|
57 |
-
def update_pollution_data()
|
58 |
-
"""
|
59 |
-
Updates pollution data for NO2 and O3.
|
60 |
-
The new data is appended to the existing pollution data file.
|
61 |
-
"""
|
62 |
O3 = []
|
63 |
NO2 = []
|
64 |
particles = ["NO2", "O3"]
|
@@ -121,21 +113,14 @@ def update_pollution_data() -> None:
|
|
121 |
updated_data.to_csv(POLLUTION_DATA_FILE, index=False)
|
122 |
|
123 |
|
124 |
-
def get_combined_data()
|
125 |
-
"""
|
126 |
-
Combines weather and pollution data for the last 7 days.
|
127 |
|
128 |
-
Returns:
|
129 |
-
pd.DataFrame: A DataFrame containing the combined weather and pollution data.
|
130 |
-
"""
|
131 |
weather_df = pd.read_csv(WEATHER_DATA_FILE)
|
132 |
-
|
133 |
today = pd.Timestamp.now().normalize()
|
134 |
seven_days_ago = today - pd.Timedelta(days=7)
|
135 |
weather_df["date"] = pd.to_datetime(weather_df["date"])
|
136 |
-
weather_df = weather_df[
|
137 |
-
(weather_df["date"] >= seven_days_ago) & (weather_df["date"] <= today)
|
138 |
-
]
|
139 |
|
140 |
weather_df.insert(1, "NO2", None)
|
141 |
weather_df.insert(2, "O3", None)
|
@@ -183,9 +168,7 @@ def get_combined_data() -> pd.DataFrame:
|
|
183 |
pollution_df = pd.read_csv(POLLUTION_DATA_FILE)
|
184 |
|
185 |
pollution_df["date"] = pd.to_datetime(pollution_df["date"])
|
186 |
-
pollution_df = pollution_df[
|
187 |
-
(pollution_df["date"] >= seven_days_ago) & (pollution_df["date"] <= today)
|
188 |
-
]
|
189 |
|
190 |
combined_df["NO2"] = pollution_df["NO2"]
|
191 |
combined_df["O3"] = pollution_df["O3"]
|
|
|
14 |
POLLUTION_DATA_FILE = "pollution_data.csv"
|
15 |
|
16 |
|
17 |
+
def update_weather_data():
|
|
|
|
|
|
|
|
|
18 |
today = date.today().isoformat()
|
19 |
|
20 |
if os.path.exists(WEATHER_DATA_FILE):
|
|
|
50 |
sys.exit()
|
51 |
|
52 |
|
53 |
+
def update_pollution_data():
|
|
|
|
|
|
|
|
|
54 |
O3 = []
|
55 |
NO2 = []
|
56 |
particles = ["NO2", "O3"]
|
|
|
113 |
updated_data.to_csv(POLLUTION_DATA_FILE, index=False)
|
114 |
|
115 |
|
116 |
+
def get_combined_data():
|
|
|
|
|
117 |
|
|
|
|
|
|
|
118 |
weather_df = pd.read_csv(WEATHER_DATA_FILE)
|
119 |
+
|
120 |
today = pd.Timestamp.now().normalize()
|
121 |
seven_days_ago = today - pd.Timedelta(days=7)
|
122 |
weather_df["date"] = pd.to_datetime(weather_df["date"])
|
123 |
+
weather_df = weather_df[(weather_df["date"] >= seven_days_ago) & (weather_df["date"] <= today)]
|
|
|
|
|
124 |
|
125 |
weather_df.insert(1, "NO2", None)
|
126 |
weather_df.insert(2, "O3", None)
|
|
|
168 |
pollution_df = pd.read_csv(POLLUTION_DATA_FILE)
|
169 |
|
170 |
pollution_df["date"] = pd.to_datetime(pollution_df["date"])
|
171 |
+
pollution_df = pollution_df[(pollution_df["date"] >= seven_days_ago) & (pollution_df["date"] <= today)]
|
|
|
|
|
172 |
|
173 |
combined_df["NO2"] = pollution_df["NO2"]
|
174 |
combined_df["O3"] = pollution_df["O3"]
|
src/features_pipeline.py
CHANGED
@@ -6,6 +6,7 @@ import numpy as np
|
|
6 |
import pandas as pd
|
7 |
from dotenv import load_dotenv
|
8 |
from huggingface_hub import hf_hub_download, login
|
|
|
9 |
from src.past_data_api_calls import get_past_combined_data
|
10 |
|
11 |
warnings.filterwarnings("ignore")
|
@@ -15,44 +16,11 @@ login(token=os.getenv("HUGGINGFACE_DOWNLOAD_TOKEN"))
|
|
15 |
|
16 |
|
17 |
def create_features(
|
18 |
-
data
|
19 |
-
target_particle
|
20 |
-
lag_days
|
21 |
-
sma_days
|
22 |
-
)
|
23 |
-
"""
|
24 |
-
Create features for predicting air quality particles (NO2 or O3) based on historical weather data.
|
25 |
-
|
26 |
-
This function performs several feature engineering tasks, including:
|
27 |
-
- Creating lagged features for specified pollutants.
|
28 |
-
- Calculating rolling mean (SMA) features.
|
29 |
-
- Adding sine and cosine transformations of the weekday and month.
|
30 |
-
- Incorporating historical data for the same date in the previous year.
|
31 |
-
|
32 |
-
Parameters:
|
33 |
-
----------
|
34 |
-
data : pd.DataFrame
|
35 |
-
A DataFrame containing historical weather and air quality data with a 'date' column.
|
36 |
-
|
37 |
-
target_particle : str
|
38 |
-
The target particle for prediction, must be either 'O3' or 'NO2'.
|
39 |
-
|
40 |
-
lag_days : int, optional
|
41 |
-
The number of days for which lagged features will be created. Default is 7.
|
42 |
-
|
43 |
-
sma_days : int, optional
|
44 |
-
The window size for calculating the simple moving average (SMA). Default is 7.
|
45 |
-
|
46 |
-
Returns:
|
47 |
-
-------
|
48 |
-
pd.DataFrame
|
49 |
-
A DataFrame containing the transformed features, ready for modeling.
|
50 |
-
|
51 |
-
Raises:
|
52 |
-
------
|
53 |
-
ValueError
|
54 |
-
If target_particle is not 'O3' or 'NO2'.
|
55 |
-
"""
|
56 |
lag_features = [
|
57 |
"NO2",
|
58 |
"O3",
|
|
|
6 |
import pandas as pd
|
7 |
from dotenv import load_dotenv
|
8 |
from huggingface_hub import hf_hub_download, login
|
9 |
+
|
10 |
from src.past_data_api_calls import get_past_combined_data
|
11 |
|
12 |
warnings.filterwarnings("ignore")
|
|
|
16 |
|
17 |
|
18 |
def create_features(
|
19 |
+
data,
|
20 |
+
target_particle, # Added this parameter
|
21 |
+
lag_days=7,
|
22 |
+
sma_days=7,
|
23 |
+
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
lag_features = [
|
25 |
"NO2",
|
26 |
"O3",
|
src/helper_functions.py
CHANGED
@@ -1,26 +1,9 @@
|
|
1 |
import streamlit as st
|
2 |
|
3 |
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
This function generates a styled markdown box displaying a label and its corresponding value.
|
9 |
-
|
10 |
-
Parameters:
|
11 |
-
----------
|
12 |
-
label : str
|
13 |
-
The text label to display in the metric box.
|
14 |
-
|
15 |
-
value : str
|
16 |
-
The value to be displayed in the metric box, typically representing a metric.
|
17 |
-
|
18 |
-
Returns:
|
19 |
-
-------
|
20 |
-
None
|
21 |
-
"""
|
22 |
-
st.markdown(
|
23 |
-
f"""
|
24 |
<div style="
|
25 |
padding: 5px;
|
26 |
margin-bottom: 5px;
|
@@ -36,42 +19,17 @@ def custom_metric_box(label: str, value: str) -> None:
|
|
36 |
<p style="font-size: 18px; font-weight: bold; margin: 0;">{value}</p> <!-- Smaller metric -->
|
37 |
</div>
|
38 |
</div>
|
39 |
-
""",
|
40 |
-
unsafe_allow_html=True,
|
41 |
-
)
|
42 |
-
|
43 |
-
|
44 |
-
def pollution_box(label: str, value: str, delta: str, threshold: float) -> None:
|
45 |
-
"""
|
46 |
-
Create a pollution metric box with a side-by-side layout and fixed width.
|
47 |
-
|
48 |
-
This function generates a styled markdown box displaying pollution level status, value, and other related information.
|
49 |
-
|
50 |
-
Parameters:
|
51 |
-
----------
|
52 |
-
label : str
|
53 |
-
The text label representing the type of pollution or metric.
|
54 |
-
|
55 |
-
value : str
|
56 |
-
The value of the pollution metric, typically a string that can be converted to a float.
|
57 |
-
|
58 |
-
delta : str
|
59 |
-
A string representing the change in pollution level, though not currently used in the rendering.
|
60 |
-
|
61 |
-
threshold : float
|
62 |
-
The threshold value to determine if the pollution level is "Good" or "Bad".
|
63 |
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
"""
|
68 |
# Determine if the pollution level is "Good" or "Bad"
|
69 |
status = "Good" if float(value.split()[0]) < threshold else "Bad"
|
70 |
status_color = "#77C124" if status == "Good" else "#E68B0A"
|
71 |
|
72 |
# Render the pollution box
|
73 |
-
st.markdown(
|
74 |
-
f"""
|
75 |
<div style="
|
76 |
background: rgba(255, 255, 255, 0.05);
|
77 |
border-radius: 16px;
|
@@ -86,6 +44,4 @@ def pollution_box(label: str, value: str, delta: str, threshold: float) -> None:
|
|
86 |
<p style="font-size: 36px; font-weight: bold; color: {status_color}; margin: 0;">{status}</p> <!-- Good/Bad with color -->
|
87 |
<p style="font-size: 18px; margin: 0;">{value}</p> <!-- Smaller value where delta used to be -->
|
88 |
</div>
|
89 |
-
""",
|
90 |
-
unsafe_allow_html=True,
|
91 |
-
)
|
|
|
1 |
import streamlit as st
|
2 |
|
3 |
|
4 |
+
# Custom function to create styled metric boxes with compact layout
|
5 |
+
def custom_metric_box(label, value):
|
6 |
+
st.markdown(f"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
<div style="
|
8 |
padding: 5px;
|
9 |
margin-bottom: 5px;
|
|
|
19 |
<p style="font-size: 18px; font-weight: bold; margin: 0;">{value}</p> <!-- Smaller metric -->
|
20 |
</div>
|
21 |
</div>
|
22 |
+
""", unsafe_allow_html=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
+
# Custom function to create pollution metric boxes with side-by-side layout for label and value
|
25 |
+
# Custom function to create pollution metric boxes with side-by-side layout and fixed width
|
26 |
+
def pollution_box(label, value, delta, threshold):
|
|
|
27 |
# Determine if the pollution level is "Good" or "Bad"
|
28 |
status = "Good" if float(value.split()[0]) < threshold else "Bad"
|
29 |
status_color = "#77C124" if status == "Good" else "#E68B0A"
|
30 |
|
31 |
# Render the pollution box
|
32 |
+
st.markdown(f"""
|
|
|
33 |
<div style="
|
34 |
background: rgba(255, 255, 255, 0.05);
|
35 |
border-radius: 16px;
|
|
|
44 |
<p style="font-size: 36px; font-weight: bold; color: {status_color}; margin: 0;">{status}</p> <!-- Good/Bad with color -->
|
45 |
<p style="font-size: 18px; margin: 0;">{value}</p> <!-- Smaller value where delta used to be -->
|
46 |
</div>
|
47 |
+
""", unsafe_allow_html=True)
|
|
|
|
src/past_data_api_calls.py
CHANGED
@@ -14,11 +14,7 @@ PAST_WEATHER_DATA_FILE = "past_weather_data.csv"
|
|
14 |
PAST_POLLUTION_DATA_FILE = "past_pollution_data.csv"
|
15 |
|
16 |
|
17 |
-
def update_past_weather_data()
|
18 |
-
"""
|
19 |
-
Updates past weather data.
|
20 |
-
The data is saved to a CSV file. If the file already exists, new data is appended.
|
21 |
-
"""
|
22 |
last_year_date = date.today() - timedelta(days=365)
|
23 |
|
24 |
if os.path.exists(PAST_WEATHER_DATA_FILE):
|
@@ -55,13 +51,7 @@ def update_past_weather_data() -> None:
|
|
55 |
sys.exit()
|
56 |
|
57 |
|
58 |
-
def update_past_pollution_data()
|
59 |
-
"""
|
60 |
-
Updates past pollution data for NO2 and O3.
|
61 |
-
|
62 |
-
Returns:
|
63 |
-
tuple: A tuple containing two lists with NO2 and O3 average values.
|
64 |
-
"""
|
65 |
O3 = []
|
66 |
NO2 = []
|
67 |
particles = ["NO2", "O3"]
|
@@ -75,7 +65,7 @@ def update_past_pollution_data() -> tuple[list[float], list[float]]:
|
|
75 |
last_date = pd.to_datetime(existing_data["date"]).max()
|
76 |
if last_date >= pd.to_datetime(last_year_date):
|
77 |
print("Data is already up to date.")
|
78 |
-
return
|
79 |
else:
|
80 |
start_date = last_date.date()
|
81 |
end_date = last_year_date + timedelta(days=3)
|
@@ -139,13 +129,7 @@ def update_past_pollution_data() -> tuple[list[float], list[float]]:
|
|
139 |
return NO2, O3
|
140 |
|
141 |
|
142 |
-
def get_past_combined_data()
|
143 |
-
"""
|
144 |
-
Retrieves and combines past weather and pollution data.
|
145 |
-
|
146 |
-
Returns:
|
147 |
-
pd.DataFrame: A DataFrame containing the combined past weather and pollution data.
|
148 |
-
"""
|
149 |
update_past_weather_data()
|
150 |
update_past_pollution_data()
|
151 |
|
|
|
14 |
PAST_POLLUTION_DATA_FILE = "past_pollution_data.csv"
|
15 |
|
16 |
|
17 |
+
def update_past_weather_data():
|
|
|
|
|
|
|
|
|
18 |
last_year_date = date.today() - timedelta(days=365)
|
19 |
|
20 |
if os.path.exists(PAST_WEATHER_DATA_FILE):
|
|
|
51 |
sys.exit()
|
52 |
|
53 |
|
54 |
+
def update_past_pollution_data():
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
O3 = []
|
56 |
NO2 = []
|
57 |
particles = ["NO2", "O3"]
|
|
|
65 |
last_date = pd.to_datetime(existing_data["date"]).max()
|
66 |
if last_date >= pd.to_datetime(last_year_date):
|
67 |
print("Data is already up to date.")
|
68 |
+
return
|
69 |
else:
|
70 |
start_date = last_date.date()
|
71 |
end_date = last_year_date + timedelta(days=3)
|
|
|
129 |
return NO2, O3
|
130 |
|
131 |
|
132 |
+
def get_past_combined_data():
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
update_past_weather_data()
|
134 |
update_past_pollution_data()
|
135 |
|
src/predict.py
CHANGED
@@ -3,9 +3,9 @@ from datetime import date, datetime, timedelta
|
|
3 |
|
4 |
import joblib
|
5 |
import pandas as pd
|
6 |
-
import torch
|
7 |
from dotenv import load_dotenv
|
8 |
from huggingface_hub import hf_hub_download, login
|
|
|
9 |
from src.data_api_calls import (
|
10 |
get_combined_data,
|
11 |
update_pollution_data,
|
@@ -17,110 +17,33 @@ load_dotenv()
|
|
17 |
login(token=os.getenv("HUGGINGFACE_DOWNLOAD_TOKEN"))
|
18 |
|
19 |
|
20 |
-
def
|
21 |
-
"""
|
22 |
-
Loads the neural network model for air pollution forecasting.
|
23 |
-
|
24 |
-
Returns:
|
25 |
-
torch.nn.Module: The loaded neural network model.
|
26 |
-
"""
|
27 |
-
import torch.nn as nn
|
28 |
-
from huggingface_hub import PyTorchModelHubMixin
|
29 |
-
|
30 |
-
class AirPollutionNet(nn.Module, PyTorchModelHubMixin):
|
31 |
-
def __init__(self, input_size: int, layers: list[int], dropout_rate: float):
|
32 |
-
super(AirPollutionNet, self).__init__()
|
33 |
-
self.layers_list = nn.ModuleList()
|
34 |
-
in_features = input_size
|
35 |
-
|
36 |
-
for units in layers:
|
37 |
-
self.layers_list.append(nn.Linear(in_features, units))
|
38 |
-
self.layers_list.append(nn.ReLU())
|
39 |
-
self.layers_list.append(nn.Dropout(p=dropout_rate))
|
40 |
-
in_features = units
|
41 |
-
|
42 |
-
self.output = nn.Linear(in_features, 3) # Output size is 3 for next 3 days
|
43 |
-
|
44 |
-
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
45 |
-
"""
|
46 |
-
Forward pass of the neural network.
|
47 |
-
|
48 |
-
Args:
|
49 |
-
x (torch.Tensor): Input tensor.
|
50 |
-
|
51 |
-
Returns:
|
52 |
-
torch.Tensor: Output tensor after passing through the network.
|
53 |
-
"""
|
54 |
-
for layer in self.layers_list:
|
55 |
-
x = layer(x)
|
56 |
-
x = self.output(x)
|
57 |
-
return x
|
58 |
-
|
59 |
-
model = AirPollutionNet.from_pretrained(
|
60 |
-
"akseljoonas/Utrecht_pollution_forecasting_NO2"
|
61 |
-
)
|
62 |
-
return model
|
63 |
-
|
64 |
-
|
65 |
-
def load_model(particle: str) -> object:
|
66 |
-
"""
|
67 |
-
Loads the forecasting model based on the specified particle.
|
68 |
-
|
69 |
-
Args:
|
70 |
-
particle (str): The type of particle ("O3" or "NO2").
|
71 |
-
|
72 |
-
Returns:
|
73 |
-
object: The loaded model (either a neural network or a support vector regression model).
|
74 |
-
"""
|
75 |
repo_id = f"elisaklunder/Utrecht-{particle}-Forecasting-Model"
|
76 |
if particle == "O3":
|
77 |
file_name = "O3_svr_model.pkl"
|
78 |
-
|
79 |
-
|
80 |
-
else:
|
81 |
-
model = load_nn()
|
82 |
|
|
|
|
|
83 |
return model
|
84 |
|
85 |
|
86 |
-
def run_model(particle
|
87 |
-
"""
|
88 |
-
Runs the model for the specified particle and makes predictions based on the input data.
|
89 |
-
|
90 |
-
Args:
|
91 |
-
particle (str): The type of particle ("O3" or "NO2").
|
92 |
-
data (pd.DataFrame): The input data for making predictions.
|
93 |
-
|
94 |
-
Returns:
|
95 |
-
list: The predictions for the specified particle.
|
96 |
-
"""
|
97 |
input_data = create_features(data=data, target_particle=particle)
|
98 |
model = load_model(particle)
|
|
|
99 |
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
repo_id = "akseljoonas/Utrecht_pollution_forecasting_NO2"
|
104 |
-
file_name = "target_scaler_NO2.joblib"
|
105 |
-
path = hf_hub_download(repo_id=repo_id, filename=file_name)
|
106 |
-
else:
|
107 |
-
prediction = model.predict(input_data)
|
108 |
-
|
109 |
-
repo_id = f"elisaklunder/Utrecht-{particle}-Forecasting-Model"
|
110 |
-
file_name = f"target_scaler_{particle}.joblib"
|
111 |
-
path = hf_hub_download(repo_id=repo_id, filename=file_name)
|
112 |
-
|
113 |
target_scaler = joblib.load(path)
|
114 |
prediction = target_scaler.inverse_transform(prediction)
|
115 |
|
116 |
return prediction
|
117 |
|
118 |
|
119 |
-
def update_data_and_predictions()
|
120 |
-
"""
|
121 |
-
Updates the weather and pollution data, makes predictions for O3 and NO2,
|
122 |
-
and stores them in a CSV file.
|
123 |
-
"""
|
124 |
update_weather_data()
|
125 |
update_pollution_data()
|
126 |
|
@@ -166,16 +89,7 @@ def update_data_and_predictions() -> None:
|
|
166 |
combined_data.to_csv(PREDICTIONS_FILE, index=False)
|
167 |
|
168 |
|
169 |
-
def get_data_and_predictions()
|
170 |
-
"""
|
171 |
-
Retrieves combined data and today's predictions for O3 and NO2.
|
172 |
-
|
173 |
-
Returns:
|
174 |
-
tuple: A tuple containing:
|
175 |
-
- week_data (pd.DataFrame): The combined data for the week.
|
176 |
-
- list: Predictions for O3.
|
177 |
-
- list: Predictions for NO2.
|
178 |
-
"""
|
179 |
week_data = get_combined_data()
|
180 |
|
181 |
PREDICTIONS_FILE = "predictions_history.csv"
|
@@ -193,7 +107,3 @@ def get_data_and_predictions() -> tuple[pd.DataFrame, list, list]:
|
|
193 |
].values
|
194 |
|
195 |
return week_data, [o3_predictions], [no2_predictions]
|
196 |
-
|
197 |
-
|
198 |
-
if __name__ == "__main__":
|
199 |
-
update_data_and_predictions()
|
|
|
3 |
|
4 |
import joblib
|
5 |
import pandas as pd
|
|
|
6 |
from dotenv import load_dotenv
|
7 |
from huggingface_hub import hf_hub_download, login
|
8 |
+
|
9 |
from src.data_api_calls import (
|
10 |
get_combined_data,
|
11 |
update_pollution_data,
|
|
|
17 |
login(token=os.getenv("HUGGINGFACE_DOWNLOAD_TOKEN"))
|
18 |
|
19 |
|
20 |
+
def load_model(particle):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
repo_id = f"elisaklunder/Utrecht-{particle}-Forecasting-Model"
|
22 |
if particle == "O3":
|
23 |
file_name = "O3_svr_model.pkl"
|
24 |
+
elif particle == "NO2":
|
25 |
+
file_name = "NO2_svr_model.pkl"
|
|
|
|
|
26 |
|
27 |
+
model_path = hf_hub_download(repo_id=repo_id, filename=file_name)
|
28 |
+
model = joblib.load(model_path)
|
29 |
return model
|
30 |
|
31 |
|
32 |
+
def run_model(particle, data):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
input_data = create_features(data=data, target_particle=particle)
|
34 |
model = load_model(particle)
|
35 |
+
prediction = model.predict(input_data)
|
36 |
|
37 |
+
repo_id = f"elisaklunder/Utrecht-{particle}-Forecasting-Model"
|
38 |
+
file_name = f"target_scaler_{particle}.joblib"
|
39 |
+
path = hf_hub_download(repo_id=repo_id, filename=file_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
target_scaler = joblib.load(path)
|
41 |
prediction = target_scaler.inverse_transform(prediction)
|
42 |
|
43 |
return prediction
|
44 |
|
45 |
|
46 |
+
def update_data_and_predictions():
|
|
|
|
|
|
|
|
|
47 |
update_weather_data()
|
48 |
update_pollution_data()
|
49 |
|
|
|
89 |
combined_data.to_csv(PREDICTIONS_FILE, index=False)
|
90 |
|
91 |
|
92 |
+
def get_data_and_predictions():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
week_data = get_combined_data()
|
94 |
|
95 |
PREDICTIONS_FILE = "predictions_history.csv"
|
|
|
107 |
].values
|
108 |
|
109 |
return week_data, [o3_predictions], [no2_predictions]
|
|
|
|
|
|
|
|
test.ipynb
ADDED
@@ -0,0 +1,327 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [
|
8 |
+
{
|
9 |
+
"name": "stderr",
|
10 |
+
"output_type": "stream",
|
11 |
+
"text": [
|
12 |
+
"c:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\.venv\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
13 |
+
" from .autonotebook import tqdm as notebook_tqdm\n"
|
14 |
+
]
|
15 |
+
}
|
16 |
+
],
|
17 |
+
"source": [
|
18 |
+
"from src.predict import get_data_and_predictions\n",
|
19 |
+
"from src.data_api_calls import get_combined_data\n",
|
20 |
+
"from src.past_data_api_calls import get_past_combined_data"
|
21 |
+
]
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"cell_type": "code",
|
25 |
+
"execution_count": 2,
|
26 |
+
"metadata": {},
|
27 |
+
"outputs": [
|
28 |
+
{
|
29 |
+
"name": "stdout",
|
30 |
+
"output_type": "stream",
|
31 |
+
"text": [
|
32 |
+
"Data is already up to date.\n",
|
33 |
+
"Data is already up to date.\n",
|
34 |
+
"Number of rows with missing values dropped: 7\n",
|
35 |
+
"Data is already up to date.\n",
|
36 |
+
"Number of rows with missing values dropped: 7\n"
|
37 |
+
]
|
38 |
+
}
|
39 |
+
],
|
40 |
+
"source": [
|
41 |
+
"week_data, predictions_O3, predictions_NO2 = get_data_and_predictions()"
|
42 |
+
]
|
43 |
+
},
|
44 |
+
{
|
45 |
+
"cell_type": "code",
|
46 |
+
"execution_count": 3,
|
47 |
+
"metadata": {},
|
48 |
+
"outputs": [
|
49 |
+
{
|
50 |
+
"data": {
|
51 |
+
"text/html": [
|
52 |
+
"<div>\n",
|
53 |
+
"<style scoped>\n",
|
54 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
55 |
+
" vertical-align: middle;\n",
|
56 |
+
" }\n",
|
57 |
+
"\n",
|
58 |
+
" .dataframe tbody tr th {\n",
|
59 |
+
" vertical-align: top;\n",
|
60 |
+
" }\n",
|
61 |
+
"\n",
|
62 |
+
" .dataframe thead th {\n",
|
63 |
+
" text-align: right;\n",
|
64 |
+
" }\n",
|
65 |
+
"</style>\n",
|
66 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
67 |
+
" <thead>\n",
|
68 |
+
" <tr style=\"text-align: right;\">\n",
|
69 |
+
" <th></th>\n",
|
70 |
+
" <th>date</th>\n",
|
71 |
+
" <th>NO2</th>\n",
|
72 |
+
" <th>O3</th>\n",
|
73 |
+
" <th>wind_speed</th>\n",
|
74 |
+
" <th>mean_temp</th>\n",
|
75 |
+
" <th>global_radiation</th>\n",
|
76 |
+
" <th>percipitation</th>\n",
|
77 |
+
" <th>pressure</th>\n",
|
78 |
+
" <th>minimum_visibility</th>\n",
|
79 |
+
" <th>humidity</th>\n",
|
80 |
+
" <th>weekday</th>\n",
|
81 |
+
" </tr>\n",
|
82 |
+
" </thead>\n",
|
83 |
+
" <tbody>\n",
|
84 |
+
" <tr>\n",
|
85 |
+
" <th>0</th>\n",
|
86 |
+
" <td>2024-10-17</td>\n",
|
87 |
+
" <td>22.804605</td>\n",
|
88 |
+
" <td>22.769160</td>\n",
|
89 |
+
" <td>51</td>\n",
|
90 |
+
" <td>169</td>\n",
|
91 |
+
" <td>43</td>\n",
|
92 |
+
" <td>6</td>\n",
|
93 |
+
" <td>10100</td>\n",
|
94 |
+
" <td>371</td>\n",
|
95 |
+
" <td>86</td>\n",
|
96 |
+
" <td>Thursday</td>\n",
|
97 |
+
" </tr>\n",
|
98 |
+
" <tr>\n",
|
99 |
+
" <th>1</th>\n",
|
100 |
+
" <td>2024-10-18</td>\n",
|
101 |
+
" <td>23.268500</td>\n",
|
102 |
+
" <td>23.307332</td>\n",
|
103 |
+
" <td>21</td>\n",
|
104 |
+
" <td>155</td>\n",
|
105 |
+
" <td>42</td>\n",
|
106 |
+
" <td>39</td>\n",
|
107 |
+
" <td>10140</td>\n",
|
108 |
+
" <td>45</td>\n",
|
109 |
+
" <td>97</td>\n",
|
110 |
+
" <td>Friday</td>\n",
|
111 |
+
" </tr>\n",
|
112 |
+
" <tr>\n",
|
113 |
+
" <th>2</th>\n",
|
114 |
+
" <td>2024-10-19</td>\n",
|
115 |
+
" <td>23.910064</td>\n",
|
116 |
+
" <td>23.171714</td>\n",
|
117 |
+
" <td>41</td>\n",
|
118 |
+
" <td>147</td>\n",
|
119 |
+
" <td>43</td>\n",
|
120 |
+
" <td>16</td>\n",
|
121 |
+
" <td>10141</td>\n",
|
122 |
+
" <td>228</td>\n",
|
123 |
+
" <td>89</td>\n",
|
124 |
+
" <td>Saturday</td>\n",
|
125 |
+
" </tr>\n",
|
126 |
+
" <tr>\n",
|
127 |
+
" <th>3</th>\n",
|
128 |
+
" <td>2024-10-20</td>\n",
|
129 |
+
" <td>22.573238</td>\n",
|
130 |
+
" <td>23.537845</td>\n",
|
131 |
+
" <td>81</td>\n",
|
132 |
+
" <td>155</td>\n",
|
133 |
+
" <td>0</td>\n",
|
134 |
+
" <td>5</td>\n",
|
135 |
+
" <td>10160</td>\n",
|
136 |
+
" <td>415</td>\n",
|
137 |
+
" <td>83</td>\n",
|
138 |
+
" <td>Sunday</td>\n",
|
139 |
+
" </tr>\n",
|
140 |
+
" <tr>\n",
|
141 |
+
" <th>4</th>\n",
|
142 |
+
" <td>2024-10-21</td>\n",
|
143 |
+
" <td>21.145700</td>\n",
|
144 |
+
" <td>24.020696</td>\n",
|
145 |
+
" <td>58</td>\n",
|
146 |
+
" <td>144</td>\n",
|
147 |
+
" <td>27</td>\n",
|
148 |
+
" <td>43</td>\n",
|
149 |
+
" <td>10206</td>\n",
|
150 |
+
" <td>220</td>\n",
|
151 |
+
" <td>92</td>\n",
|
152 |
+
" <td>Monday</td>\n",
|
153 |
+
" </tr>\n",
|
154 |
+
" <tr>\n",
|
155 |
+
" <th>5</th>\n",
|
156 |
+
" <td>2024-10-22</td>\n",
|
157 |
+
" <td>21.776580</td>\n",
|
158 |
+
" <td>23.335886</td>\n",
|
159 |
+
" <td>53</td>\n",
|
160 |
+
" <td>114</td>\n",
|
161 |
+
" <td>57</td>\n",
|
162 |
+
" <td>49</td>\n",
|
163 |
+
" <td>10269</td>\n",
|
164 |
+
" <td>226</td>\n",
|
165 |
+
" <td>92</td>\n",
|
166 |
+
" <td>Tuesday</td>\n",
|
167 |
+
" </tr>\n",
|
168 |
+
" <tr>\n",
|
169 |
+
" <th>6</th>\n",
|
170 |
+
" <td>2024-10-23</td>\n",
|
171 |
+
" <td>21.974794</td>\n",
|
172 |
+
" <td>22.214689</td>\n",
|
173 |
+
" <td>36</td>\n",
|
174 |
+
" <td>112</td>\n",
|
175 |
+
" <td>12</td>\n",
|
176 |
+
" <td>0</td>\n",
|
177 |
+
" <td>10328</td>\n",
|
178 |
+
" <td>65</td>\n",
|
179 |
+
" <td>97</td>\n",
|
180 |
+
" <td>Wednesday</td>\n",
|
181 |
+
" </tr>\n",
|
182 |
+
" <tr>\n",
|
183 |
+
" <th>7</th>\n",
|
184 |
+
" <td>2024-10-24</td>\n",
|
185 |
+
" <td>25.512568</td>\n",
|
186 |
+
" <td>20.913710</td>\n",
|
187 |
+
" <td>56</td>\n",
|
188 |
+
" <td>104</td>\n",
|
189 |
+
" <td>62</td>\n",
|
190 |
+
" <td>0</td>\n",
|
191 |
+
" <td>10247</td>\n",
|
192 |
+
" <td>130</td>\n",
|
193 |
+
" <td>94</td>\n",
|
194 |
+
" <td>Thursday</td>\n",
|
195 |
+
" </tr>\n",
|
196 |
+
" </tbody>\n",
|
197 |
+
"</table>\n",
|
198 |
+
"</div>"
|
199 |
+
],
|
200 |
+
"text/plain": [
|
201 |
+
" date NO2 O3 wind_speed mean_temp global_radiation \\\n",
|
202 |
+
"0 2024-10-17 22.804605 22.769160 51 169 43 \n",
|
203 |
+
"1 2024-10-18 23.268500 23.307332 21 155 42 \n",
|
204 |
+
"2 2024-10-19 23.910064 23.171714 41 147 43 \n",
|
205 |
+
"3 2024-10-20 22.573238 23.537845 81 155 0 \n",
|
206 |
+
"4 2024-10-21 21.145700 24.020696 58 144 27 \n",
|
207 |
+
"5 2024-10-22 21.776580 23.335886 53 114 57 \n",
|
208 |
+
"6 2024-10-23 21.974794 22.214689 36 112 12 \n",
|
209 |
+
"7 2024-10-24 25.512568 20.913710 56 104 62 \n",
|
210 |
+
"\n",
|
211 |
+
" percipitation pressure minimum_visibility humidity weekday \n",
|
212 |
+
"0 6 10100 371 86 Thursday \n",
|
213 |
+
"1 39 10140 45 97 Friday \n",
|
214 |
+
"2 16 10141 228 89 Saturday \n",
|
215 |
+
"3 5 10160 415 83 Sunday \n",
|
216 |
+
"4 43 10206 220 92 Monday \n",
|
217 |
+
"5 49 10269 226 92 Tuesday \n",
|
218 |
+
"6 0 10328 65 97 Wednesday \n",
|
219 |
+
"7 0 10247 130 94 Thursday "
|
220 |
+
]
|
221 |
+
},
|
222 |
+
"execution_count": 3,
|
223 |
+
"metadata": {},
|
224 |
+
"output_type": "execute_result"
|
225 |
+
}
|
226 |
+
],
|
227 |
+
"source": [
|
228 |
+
"week_data"
|
229 |
+
]
|
230 |
+
},
|
231 |
+
{
|
232 |
+
"cell_type": "code",
|
233 |
+
"execution_count": 4,
|
234 |
+
"metadata": {},
|
235 |
+
"outputs": [
|
236 |
+
{
|
237 |
+
"data": {
|
238 |
+
"text/plain": [
|
239 |
+
"array([[10.33808859, 16.00098432, 19.64377496]])"
|
240 |
+
]
|
241 |
+
},
|
242 |
+
"execution_count": 4,
|
243 |
+
"metadata": {},
|
244 |
+
"output_type": "execute_result"
|
245 |
+
}
|
246 |
+
],
|
247 |
+
"source": [
|
248 |
+
"predictions_O3"
|
249 |
+
]
|
250 |
+
},
|
251 |
+
{
|
252 |
+
"cell_type": "code",
|
253 |
+
"execution_count": 5,
|
254 |
+
"metadata": {},
|
255 |
+
"outputs": [
|
256 |
+
{
|
257 |
+
"data": {
|
258 |
+
"text/plain": [
|
259 |
+
"array([[25.68519992, 25.76030745, 31.21057679]])"
|
260 |
+
]
|
261 |
+
},
|
262 |
+
"execution_count": 5,
|
263 |
+
"metadata": {},
|
264 |
+
"output_type": "execute_result"
|
265 |
+
}
|
266 |
+
],
|
267 |
+
"source": [
|
268 |
+
"predictions_NO2"
|
269 |
+
]
|
270 |
+
},
|
271 |
+
{
|
272 |
+
"cell_type": "code",
|
273 |
+
"execution_count": 1,
|
274 |
+
"metadata": {},
|
275 |
+
"outputs": [],
|
276 |
+
"source": [
|
277 |
+
"from src.data_api_calls import get_combined_data"
|
278 |
+
]
|
279 |
+
},
|
280 |
+
{
|
281 |
+
"cell_type": "code",
|
282 |
+
"execution_count": 2,
|
283 |
+
"metadata": {},
|
284 |
+
"outputs": [
|
285 |
+
{
|
286 |
+
"ename": "TypeError",
|
287 |
+
"evalue": "'<' not supported between instances of 'Timestamp' and 'str'",
|
288 |
+
"output_type": "error",
|
289 |
+
"traceback": [
|
290 |
+
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
291 |
+
"\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
|
292 |
+
"Cell \u001b[1;32mIn[2], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mget_combined_data\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m2024-10-10\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
|
293 |
+
"File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\src\\data_api_calls.py:136\u001b[0m, in \u001b[0;36mget_combined_data\u001b[1;34m(input_date)\u001b[0m\n\u001b[0;32m 133\u001b[0m start_date \u001b[38;5;241m=\u001b[39m end_date \u001b[38;5;241m-\u001b[39m timedelta(\u001b[38;5;241m7\u001b[39m)\n\u001b[0;32m 135\u001b[0m update_weather_data(start_date, end_date)\n\u001b[1;32m--> 136\u001b[0m \u001b[43mupdate_pollution_data\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstart_date\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mend_date\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 138\u001b[0m weather_df \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mread_csv(WEATHER_DATA_FILE)\n\u001b[0;32m 140\u001b[0m weather_df\u001b[38;5;241m.\u001b[39minsert(\u001b[38;5;241m1\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNO2\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n",
|
294 |
+
"File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\src\\data_api_calls.py:123\u001b[0m, in \u001b[0;36mupdate_pollution_data\u001b[1;34m(start_date, end_date)\u001b[0m\n\u001b[0;32m 121\u001b[0m updated_data \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mconcat([existing_data, new_data], ignore_index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m 122\u001b[0m updated_data\u001b[38;5;241m.\u001b[39mdrop_duplicates(subset\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdate\u001b[39m\u001b[38;5;124m\"\u001b[39m, keep\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlast\u001b[39m\u001b[38;5;124m\"\u001b[39m, inplace\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m--> 123\u001b[0m updated_data \u001b[38;5;241m=\u001b[39m \u001b[43mupdated_data\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msort_values\u001b[49m\u001b[43m(\u001b[49m\u001b[43mby\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdate\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m 124\u001b[0m updated_data\u001b[38;5;241m.\u001b[39mto_csv(POLLUTION_DATA_FILE, index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n",
|
295 |
+
"File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\.venv\\Lib\\site-packages\\pandas\\core\\frame.py:7200\u001b[0m, in \u001b[0;36mDataFrame.sort_values\u001b[1;34m(self, by, axis, ascending, inplace, kind, na_position, ignore_index, key)\u001b[0m\n\u001b[0;32m 7197\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(ascending, (\u001b[38;5;28mtuple\u001b[39m, \u001b[38;5;28mlist\u001b[39m)):\n\u001b[0;32m 7198\u001b[0m ascending \u001b[38;5;241m=\u001b[39m ascending[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m-> 7200\u001b[0m indexer \u001b[38;5;241m=\u001b[39m \u001b[43mnargsort\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 7201\u001b[0m \u001b[43m \u001b[49m\u001b[43mk\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkind\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkind\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mascending\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mascending\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mna_position\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mna_position\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkey\u001b[49m\n\u001b[0;32m 7202\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 7203\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 7204\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m inplace:\n",
|
296 |
+
"File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\.venv\\Lib\\site-packages\\pandas\\core\\sorting.py:439\u001b[0m, in \u001b[0;36mnargsort\u001b[1;34m(items, kind, ascending, na_position, key, mask)\u001b[0m\n\u001b[0;32m 437\u001b[0m non_nans \u001b[38;5;241m=\u001b[39m non_nans[::\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\n\u001b[0;32m 438\u001b[0m non_nan_idx \u001b[38;5;241m=\u001b[39m non_nan_idx[::\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\n\u001b[1;32m--> 439\u001b[0m indexer \u001b[38;5;241m=\u001b[39m non_nan_idx[\u001b[43mnon_nans\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43margsort\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkind\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkind\u001b[49m\u001b[43m)\u001b[49m]\n\u001b[0;32m 440\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m ascending:\n\u001b[0;32m 441\u001b[0m indexer \u001b[38;5;241m=\u001b[39m indexer[::\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\n",
|
297 |
+
"\u001b[1;31mTypeError\u001b[0m: '<' not supported between instances of 'Timestamp' and 'str'"
|
298 |
+
]
|
299 |
+
}
|
300 |
+
],
|
301 |
+
"source": [
|
302 |
+
"get_combined_data(\"2024-10-10\")"
|
303 |
+
]
|
304 |
+
}
|
305 |
+
],
|
306 |
+
"metadata": {
|
307 |
+
"kernelspec": {
|
308 |
+
"display_name": ".venv",
|
309 |
+
"language": "python",
|
310 |
+
"name": "python3"
|
311 |
+
},
|
312 |
+
"language_info": {
|
313 |
+
"codemirror_mode": {
|
314 |
+
"name": "ipython",
|
315 |
+
"version": 3
|
316 |
+
},
|
317 |
+
"file_extension": ".py",
|
318 |
+
"mimetype": "text/x-python",
|
319 |
+
"name": "python",
|
320 |
+
"nbconvert_exporter": "python",
|
321 |
+
"pygments_lexer": "ipython3",
|
322 |
+
"version": "3.11.8"
|
323 |
+
}
|
324 |
+
},
|
325 |
+
"nbformat": 4,
|
326 |
+
"nbformat_minor": 2
|
327 |
+
}
|
weather_data.csv
CHANGED
@@ -13,16 +13,3 @@ date,temp,humidity,precip,windspeed,sealevelpressure,visibility,solarradiation
|
|
13 |
2024-10-28,12.4,91.8,1.1,31.7,1021.8,12.8,27.3
|
14 |
2024-10-29,13.8,95.9,0.2,20.5,1023.1,8.1,16.0
|
15 |
2024-10-30,12.7,92.9,0.6,9.4,1027.5,12.5,32.8
|
16 |
-
2024-10-31,12.5,89.9,0.0,11.2,1027.1,17.1,70.6
|
17 |
-
2024-11-01,9.9,96.9,0.0,14.4,1024.3,3.2,10.0
|
18 |
-
2024-11-02,10.0,87.6,0.5,14.4,1030.9,17.9,36.1
|
19 |
-
2024-11-03,5.7,91.3,0.0,7.2,1030.3,14.6,37.7
|
20 |
-
2024-11-04,7.3,90.5,0.0,11.9,1027.6,24.1,38.4
|
21 |
-
2024-11-05,7.0,89.9,0.0,13.0,1023.4,9.0,33.1
|
22 |
-
2024-11-06,8.9,94.4,0.0,9.4,1029.7,12.6,54.7
|
23 |
-
2024-11-07,7.2,90.8,0.0,13.7,1032.8,16.9,76.6
|
24 |
-
2024-11-08,4.9,89.5,0.0,14.4,1028.3,21.5,10.8
|
25 |
-
2024-11-09,6.3,93.0,0.0,11.2,1024.9,6.3,14.5
|
26 |
-
2024-11-10,8.9,94.2,0.0,12.6,1026.6,14.3,32.7
|
27 |
-
2024-11-11,10.6,84.7,2.1,27.7,1028.4,9.9,22.3
|
28 |
-
2024-11-12,8.6,86.2,0.8,22.3,1032.3,18.8,33.2
|
|
|
13 |
2024-10-28,12.4,91.8,1.1,31.7,1021.8,12.8,27.3
|
14 |
2024-10-29,13.8,95.9,0.2,20.5,1023.1,8.1,16.0
|
15 |
2024-10-30,12.7,92.9,0.6,9.4,1027.5,12.5,32.8
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|