Minh Van Pham
commited on
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import pandas as pd
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
+
from mpl_toolkits.mplot3d import Axes3D
|
5 |
+
from sklearn.model_selection import train_test_split
|
6 |
+
import gradio as gr
|
7 |
+
import plotly.graph_objects as go
|
8 |
+
|
9 |
+
class LinearRegression:
|
10 |
+
def __init__(self, eta = 0.01, n_iter = 1000):
|
11 |
+
self.eta = eta
|
12 |
+
self.n_iter = n_iter
|
13 |
+
self.w = None
|
14 |
+
self.b = None
|
15 |
+
self.Lambda = 0.7
|
16 |
+
self.Min = None
|
17 |
+
self.Max = None
|
18 |
+
self.cost_history = None
|
19 |
+
def ScaleData(self, X):
|
20 |
+
return (X - self.Min) / (self.Max - self.Min);
|
21 |
+
def compute_cost(self, X, y, w, b):
|
22 |
+
m = len(y);
|
23 |
+
J = np.sum(((np.dot(X,w) + b) - y) ** 2) / (2 * m);
|
24 |
+
return J;
|
25 |
+
def SubGrad(X, y, w, b):
|
26 |
+
new_W = np.zeros((3,1));
|
27 |
+
m = X.shape[0];
|
28 |
+
A = X;
|
29 |
+
dW = (((np.dot(X,w) + b) - y) / m).reshape(m,1);
|
30 |
+
A = A*dW;
|
31 |
+
new_W = np.sum(A, axis = 0).reshape((X.shape[1],1));
|
32 |
+
return new_W;
|
33 |
+
def Grad_de(self,X, y, X_test, y_test, w, eta, b, lamda, decay_rate = 0.045):
|
34 |
+
cost_history = [];
|
35 |
+
new_W = w;
|
36 |
+
Fake_W = np.zeros((X.shape[1],1));
|
37 |
+
S_corrected_w = np.zeros((X.shape[1],1));
|
38 |
+
new_B = 0;
|
39 |
+
Fake_B = 0;
|
40 |
+
S_corrected_b = 0;
|
41 |
+
W_prev = Fake_W;
|
42 |
+
m = len(X);
|
43 |
+
iter = 1;
|
44 |
+
count = 0;
|
45 |
+
new_eta = eta;
|
46 |
+
m = X.shape[0];
|
47 |
+
for iter in range(0,1000):
|
48 |
+
new_W = self.SubGrad(X,y,w, lamda, b);
|
49 |
+
new_B = 0;
|
50 |
+
count = count + 1;
|
51 |
+
new_B = np.sum(((np.dot(X,w) + b) - y) / m);
|
52 |
+
Fake_W = 0.9 * Fake_W + 0.1 * new_W;
|
53 |
+
Fake_B = 0.9 * Fake_B + 0.1 * new_B;
|
54 |
+
S_corrected_w = 0.99 * S_corrected_w + 0.01 * (new_W ** 2);
|
55 |
+
S_corrected_b = 0.99 * S_corrected_b + 0.01 * (new_B ** 2);
|
56 |
+
w = w - (new_eta / (np.sqrt(S_corrected_w) + 1e-8)) * Fake_W;
|
57 |
+
b = b - (new_eta / (np.sqrt(S_corrected_b) + 1e-8)) * Fake_B;
|
58 |
+
cost_history.append(self.compute_cost(X_test,y_test,w,b));
|
59 |
+
new_eta = eta / (1 + np.floor(count/50) * decay_rate);
|
60 |
+
|
61 |
+
return (w,b, cost_history);
|
62 |
+
def standardize(self,X):
|
63 |
+
self.Max = np.zeros(X.shape[1]);
|
64 |
+
self.Min = np.zeros(X.shape[1]);
|
65 |
+
for i in range(0,len(X)):
|
66 |
+
self.Min[i] = 10000000;
|
67 |
+
self.Max[i] = -10000000;
|
68 |
+
for i in range(0,len(X)):
|
69 |
+
for j in range(0,X.shape[1]):
|
70 |
+
if self.Max[j] < X[i][j]:
|
71 |
+
self.Max[j] = (X[i][j]);
|
72 |
+
if self.Min[j] > X[i][j]:
|
73 |
+
self.Min[j] = (X[i][j]);
|
74 |
+
return (X - self.Min) / (self.Max - self.Min);
|
75 |
+
def fit(self, X, y):
|
76 |
+
self.w = (np.random.rand(X.shape[1])*0.01).reshape(X.shape[1],1);
|
77 |
+
self.b = 0
|
78 |
+
self.cost = []
|
79 |
+
X = self.standardize(X);
|
80 |
+
(self.w,self.b, self.cost_history) = self.Grad_de(X, y, self.w, self.b, self.eta, self.n_iter, self.Lambda);
|
81 |
+
return self
|
82 |
+
def predict(self, X):
|
83 |
+
XPr = self.ScaleData(X, self.Min, self.Max);
|
84 |
+
Xans = np.dot(XPr,self.w) + self.b;
|
85 |
+
x1 = Xans[0][0];
|
86 |
+
return x1
|
87 |
+
|
88 |
+
df = pd.read_csv("C:\Project\Kaggle\Cali_housing_Price\housing_price_dataset.csv");
|
89 |
+
Data = pd.read_csv("C:\Project\Kaggle\Cali_housing_Price\housing_price_dataset.csv");
|
90 |
+
y = Data['Price'];
|
91 |
+
X = Data.drop(columns= ['Price']);
|
92 |
+
for i in range(X['Neighborhood'].shape[0]):
|
93 |
+
if X.loc[i,'Neighborhood'] == 'Suburb':
|
94 |
+
X.loc[i,'Neighborhood'] = 2;
|
95 |
+
elif X.loc[i,'Neighborhood'] == 'Urban':
|
96 |
+
X.loc[i,'Neighborhood'] = 3;
|
97 |
+
else:
|
98 |
+
X.loc[i,'Neighborhood'] = 1;
|
99 |
+
Dx = X.to_numpy(dtype= np.float64);
|
100 |
+
Dy = y.to_numpy(dtype= np.float64);
|
101 |
+
X_train, X_test, y_train, y_test = train_test_split(Dx, Dy, test_size=0.02, random_state=42);
|
102 |
+
w = (np.random.rand(X.shape[1])*0.01).reshape(X.shape[1],1);
|
103 |
+
y_train = y_train.reshape(len(y_train),1);
|
104 |
+
y_test = y_test.reshape(len(y_test),1);
|
105 |
+
HousePriceModel = LinearRegression();
|
106 |
+
HousePriceModel.fit(X_train, y_train);
|
107 |
+
def HousePrice(SquareFeet, Bedrooms, Bathrooms, Neighborhood, YearBuilt):
|
108 |
+
NumNeighborhood = 0;
|
109 |
+
if Neighborhood == 'Suburb':
|
110 |
+
NumNeighborhood = 2;
|
111 |
+
elif Neighborhood == 'Urban':
|
112 |
+
NumNeighborhood = 3;
|
113 |
+
elif Neighborhood == 'Rural':
|
114 |
+
NumNeighborhood = 1;
|
115 |
+
else:
|
116 |
+
raise gr.Error("Invalid Neighborhood");
|
117 |
+
if YearBuilt > 2024 or YearBuilt < 1900:
|
118 |
+
raise gr.Error("Invalid Year Built");
|
119 |
+
if SquareFeet < 0:
|
120 |
+
raise gr.Error("Invalid Square Feet");
|
121 |
+
if Bedrooms < 0:
|
122 |
+
raise gr.Error("Invalid Bedrooms");
|
123 |
+
if Bathrooms < 0:
|
124 |
+
raise gr.Error("Invalid Bathrooms");
|
125 |
+
X = np.array([SquareFeet, Bedrooms, Bathrooms, NumNeighborhood, YearBuilt]).reshape(1,5);
|
126 |
+
YPredict = HousePriceModel.predict(X);
|
127 |
+
#### Filter the data
|
128 |
+
filter_df = df[(df['SquareFeet'] >= SquareFeet - 20) & (df['SquareFeet'] <= SquareFeet + 20) & (df['Bedrooms'] >= Bedrooms - 0) & (df['Bedrooms'] <= Bedrooms + 0) & (df['Bathrooms'] >= Bathrooms - 0) & (df['Bathrooms'] <= Bathrooms + 0) & (df['YearBuilt'] >= YearBuilt - 5) & (df['YearBuilt'] <= YearBuilt + 5)];
|
129 |
+
df_list = filter_df.values.tolist()
|
130 |
+
fig = go.Figure()
|
131 |
+
fig.add_trace(go.Scatter(
|
132 |
+
customdata=df_list,
|
133 |
+
x = filter_df['SquareFeet'].tolist(),
|
134 |
+
y = filter_df['Price'].tolist(),
|
135 |
+
mode = 'markers',
|
136 |
+
marker = dict(color = 'blue'),
|
137 |
+
hoverinfo="text",
|
138 |
+
hovertemplate= '<b>Square Feet</b>: %{x}<br><b>Bedrooms</b>: %{customdata[1]}<br><b>Bathrooms</b>: %{customdata[2]}<br><b>Neighborhood</b>: %{customdata[3]}<br><b>Year Built</b>: %{customdata[4]}<br><b>Price</b>: %{y}<extra></extra>',
|
139 |
+
name = 'House Price Actual'
|
140 |
+
))
|
141 |
+
|
142 |
+
fig.add_trace(go.Scatter(
|
143 |
+
x = [SquareFeet],
|
144 |
+
y = [YPredict],
|
145 |
+
mode = 'markers',
|
146 |
+
marker = dict(color = 'red'),
|
147 |
+
hovertext = 'Predicted Price',
|
148 |
+
name = 'House Price Prediction'
|
149 |
+
))
|
150 |
+
|
151 |
+
fig.update_layout()
|
152 |
+
|
153 |
+
return YPredict, fig
|
154 |
+
|
155 |
+
with gr.Blocks() as demo:
|
156 |
+
gr.Markdown("""
|
157 |
+
# House Price Prediction
|
158 |
+
This is a simple model to predict the price of a house based on its features. The database's feature have min sqaure feet is about 1000 and max is 3000.
|
159 |
+
""")
|
160 |
+
gr.Markdown("""
|
161 |
+
Enter the features of the house and click 'Predict Price' to see the predicted price.
|
162 |
+
You can also click 'Filter Map' to see the actual prices of houses with similar features on a map.
|
163 |
+
""")
|
164 |
+
with gr.Column():
|
165 |
+
with gr.Row():
|
166 |
+
SquareFeet = gr.Number(value=250, label="Square Feet")
|
167 |
+
Bedrooms = gr.Number(value=3, label="Bedrooms")
|
168 |
+
Bathrooms = gr.Number(value=1, label="Bathrooms")
|
169 |
+
Neighborhood = gr.Radio(["Suburb", "Urban", "Rural"], label="Neighborhood")
|
170 |
+
YearBuilt = gr.Number(value=2020, label="Year Built")
|
171 |
+
gr.Button("Predict Price").click(
|
172 |
+
fn=HousePrice,
|
173 |
+
inputs=[SquareFeet, Bedrooms, Bathrooms, Neighborhood, YearBuilt],
|
174 |
+
outputs=[gr.Textbox(label="Predicted Price"), gr.Plot(label="Similar Houses")]
|
175 |
+
)
|
176 |
+
demo.launch()
|