Spaces:
Sleeping
Sleeping
Shafeek Saleem
commited on
Commit
•
b3a91c7
1
Parent(s):
086dd3b
ss
Browse files
.idea/sonarlint/issuestore/6/0/603fc2a4019aac2f96f36d343c1617f2e625b0f6
DELETED
File without changes
|
.idea/sonarlint/issuestore/index.pb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c493d713dc16abecbcf2979862f605e2882a86c4027cec59edadf806b82f7144
|
3 |
+
size 130
|
pages/3_Training the Model.py
CHANGED
@@ -24,9 +24,10 @@ LEVEL = 3
|
|
24 |
File_PATH = 'datasets/Building_forcasting.csv'
|
25 |
|
26 |
def process_file(csv_file):
|
27 |
-
data = pd.read_csv(csv_file
|
28 |
-
data
|
29 |
-
data =
|
|
|
30 |
return data
|
31 |
|
32 |
|
@@ -41,38 +42,46 @@ def model_train(train_X, train_y, model_choice, train_size):
|
|
41 |
return model, X_test, y_test
|
42 |
|
43 |
|
44 |
-
def create_model_inputs(data, lag, mean_period):
|
45 |
df_processed = data.copy()
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
st.subheader("Model Performance")
|
57 |
st.write(f"Test R2 score: {r2_score(y_test, y_pred):.2f}")
|
58 |
|
59 |
fig, axs = plt.subplots(3, figsize=(12, 18))
|
60 |
-
axs[0].plot(y_test.index, y_pred
|
61 |
-
axs[0].plot(y_test.index, y_test[
|
62 |
axs[0].legend()
|
63 |
-
axs[0].set_title('Prediction vs Actual (
|
64 |
-
axs[0].set_xlabel('Date')
|
65 |
-
axs[0].set_ylabel('
|
66 |
|
67 |
-
axs[1].plot(y_test.index, y_pred
|
68 |
-
axs[1].set_title('Predicted
|
69 |
-
axs[1].set_xlabel('Date')
|
70 |
-
axs[1].set_ylabel('
|
71 |
|
72 |
-
axs[2].plot(y_test.index, y_test[
|
73 |
-
axs[2].set_title('Actual
|
74 |
-
axs[2].set_xlabel('Date')
|
75 |
-
axs[2].set_ylabel('
|
76 |
|
77 |
fig.tight_layout()
|
78 |
with _lock:
|
@@ -151,8 +160,10 @@ def step3_page():
|
|
151 |
if state == "preprocessing":
|
152 |
st.subheader("Step 2: Data Preprocessing and Feature Engineering")
|
153 |
st.write("Now let's preprocess our dataset to handle missing values, outliers and inconsistencies and then perform feature engineering tasks to extract meaningful features from the raw data. Finally we need to separate training variables (X) and target variable (y).")
|
154 |
-
|
155 |
-
|
|
|
|
|
156 |
cols = st.columns(2)
|
157 |
state = "splitting"
|
158 |
with cols[0]:
|
@@ -199,7 +210,7 @@ def step3_page():
|
|
199 |
st.subheader("Step 5: Model Evaluation")
|
200 |
st.write("Now, let's evaluate our weather forecasting model's performance against the test data set.")
|
201 |
y_pred = model.predict(X_test)
|
202 |
-
fig = show_output(y_test, y_pred)
|
203 |
# download_link(y_test, y_pred)
|
204 |
#
|
205 |
# download_plot(fig)
|
|
|
24 |
File_PATH = 'datasets/Building_forcasting.csv'
|
25 |
|
26 |
def process_file(csv_file):
|
27 |
+
data = pd.read_csv(csv_file)
|
28 |
+
data['index'] = data['Date'].str.cat(data['Location'], sep='_')
|
29 |
+
data.set_index('index', inplace=True)
|
30 |
+
data['Date'] = pd.to_datetime(data['Date'])
|
31 |
return data
|
32 |
|
33 |
|
|
|
42 |
return model, X_test, y_test
|
43 |
|
44 |
|
45 |
+
def create_model_inputs(data, lag, mean_period, target_variable):
|
46 |
df_processed = data.copy()
|
47 |
+
selected_columns = ["MinTemp", "MaxTemp", "Rainfall", "WindGustSpeed", "WindSpeed9am", "WindSpeed3pm",
|
48 |
+
"Humidity9am", "Humidity3pm", "Temp9am", "Temp3pm", "Pressure9am", "Pressure3pm"]
|
49 |
+
for col in selected_columns:
|
50 |
+
df_processed[col].fillna(df_processed[col].mean(), inplace=True)
|
51 |
+
df_processed[target_variable + "Tomorrow"] = df_processed[target_variable].shift(-1 * lag)
|
52 |
+
df_processed[target_variable + "_mean"] = df_processed[target_variable].rolling(window=mean_period).mean()
|
53 |
+
df_processed.drop(columns=['Evaporation', 'Sunshine', 'Cloud9am', 'Cloud3pm'], inplace=True)
|
54 |
+
X = df_processed[
|
55 |
+
["Location", "MinTemp", "MaxTemp", "Rainfall", "WindGustDir", "WindGustSpeed", "WindDir9am", "WindDir3pm",
|
56 |
+
"WindSpeed9am", "WindSpeed3pm", "Humidity9am", "Humidity3pm", "Pressure9am", "Pressure3pm", "Temp9am",
|
57 |
+
"Temp3pm", "RainToday", target_variable + "_mean"]]
|
58 |
+
X = pd.get_dummies(X, columns=['Location', 'WindGustDir', 'WindDir9am', 'WindDir3pm'])
|
59 |
+
y = df_processed[target_variable + "Tomorrow"].loc[X.index]
|
60 |
+
|
61 |
+
return X, y, target_variable + "Tomorrow"
|
62 |
+
|
63 |
+
|
64 |
+
def show_output(y_test, y_pred, target_variable_name):
|
65 |
st.subheader("Model Performance")
|
66 |
st.write(f"Test R2 score: {r2_score(y_test, y_pred):.2f}")
|
67 |
|
68 |
fig, axs = plt.subplots(3, figsize=(12, 18))
|
69 |
+
axs[0].plot(y_test.index, y_pred, label='Predicted')
|
70 |
+
axs[0].plot(y_test.index, y_test[target_variable_name], label='Actual')
|
71 |
axs[0].legend()
|
72 |
+
axs[0].set_title(f'Prediction vs Actual ({target_variable_name})')
|
73 |
+
axs[0].set_xlabel('Date and Location')
|
74 |
+
axs[0].set_ylabel(f'{target_variable_name}')
|
75 |
|
76 |
+
axs[1].plot(y_test.index, y_pred, label='Predicted')
|
77 |
+
axs[1].set_title(f'Predicted {target_variable_name}')
|
78 |
+
axs[1].set_xlabel('Date and Location')
|
79 |
+
axs[1].set_ylabel(f'{target_variable_name}')
|
80 |
|
81 |
+
axs[2].plot(y_test.index, y_test[target_variable_name], label='Actual')
|
82 |
+
axs[2].set_title(f'Actual {target_variable_name}')
|
83 |
+
axs[2].set_xlabel('Date and Location')
|
84 |
+
axs[2].set_ylabel(f'{target_variable_name}')
|
85 |
|
86 |
fig.tight_layout()
|
87 |
with _lock:
|
|
|
160 |
if state == "preprocessing":
|
161 |
st.subheader("Step 2: Data Preprocessing and Feature Engineering")
|
162 |
st.write("Now let's preprocess our dataset to handle missing values, outliers and inconsistencies and then perform feature engineering tasks to extract meaningful features from the raw data. Finally we need to separate training variables (X) and target variable (y).")
|
163 |
+
st.info("You can select the weather attribute that you want to forecast (WindSpeed/ Humidity/ Pressure/ Temperature) and the time of the forecast (9am tomorrow/ 3pm tomorrow)")
|
164 |
+
target_variables = ['WindSpeed9amTomorrow', 'WindSpeed3pmTomorrow', 'Humidity9amTomorrow', 'Humidity3pmTomorrow', 'Pressure9amTomorrow', 'Pressure3pmTomorrow', 'Temp9amTomorrow', 'Temp3pmTomorrow']
|
165 |
+
target_variable = st.selectbox('Select Target Variable', target_variables)
|
166 |
+
X, y, target_variable_name = create_model_inputs(data, 1, 30, target_variable)
|
167 |
cols = st.columns(2)
|
168 |
state = "splitting"
|
169 |
with cols[0]:
|
|
|
210 |
st.subheader("Step 5: Model Evaluation")
|
211 |
st.write("Now, let's evaluate our weather forecasting model's performance against the test data set.")
|
212 |
y_pred = model.predict(X_test)
|
213 |
+
fig = show_output(y_test, y_pred, target_variable_name)
|
214 |
# download_link(y_test, y_pred)
|
215 |
#
|
216 |
# download_plot(fig)
|
pages/{4_Congratulations.py → 5_Congratulations.py}
RENAMED
File without changes
|