Fah-d commited on
Commit
04e24ee
·
1 Parent(s): 5eec01e

Upload 10 files

Browse files
Nigerian Car Price EDA.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
Nigerian_Car_Price_Prediction.ipynb ADDED
@@ -0,0 +1,1072 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 3,
6
+ "id": "c0b8d60a",
7
+ "metadata": {
8
+ "id": "c0b8d60a"
9
+ },
10
+ "outputs": [],
11
+ "source": [
12
+ "import pandas as pd\n",
13
+ "import numpy as np\n",
14
+ "import seaborn as sns\n",
15
+ "import matplotlib.pyplot as plt\n",
16
+ "import warnings\n",
17
+ "warnings.filterwarnings(\"ignore\")\n",
18
+ "sns.set_style(\"darkgrid\")\n",
19
+ "sns.set_palette('RdYlGn')\n",
20
+ "\n",
21
+ "#model\n",
22
+ "from sklearn.preprocessing import LabelEncoder,StandardScaler,MinMaxScaler\n",
23
+ "from sklearn.model_selection import train_test_split\n",
24
+ "from sklearn.metrics import mean_squared_error, r2_score\n",
25
+ "from sklearn.ensemble import RandomForestRegressor\n",
26
+ "from xgboost import XGBRegressor\n",
27
+ "from sklearn.linear_model import LinearRegression\n",
28
+ "\n",
29
+ "import gradio as gr\n",
30
+ "import joblib"
31
+ ]
32
+ },
33
+ {
34
+ "cell_type": "code",
35
+ "execution_count": 4,
36
+ "id": "11273e4d",
37
+ "metadata": {
38
+ "id": "11273e4d"
39
+ },
40
+ "outputs": [],
41
+ "source": [
42
+ "df = pd.read_csv(\"/content/Nigerian_Car_Prices.csv\")"
43
+ ]
44
+ },
45
+ {
46
+ "cell_type": "code",
47
+ "execution_count": 5,
48
+ "id": "dffa0dba",
49
+ "metadata": {
50
+ "colab": {
51
+ "base_uri": "https://localhost:8080/",
52
+ "height": 340
53
+ },
54
+ "id": "dffa0dba",
55
+ "outputId": "eb17a45d-8e91-41b5-ddae-0be82f2fe1f6"
56
+ },
57
+ "outputs": [
58
+ {
59
+ "output_type": "execute_result",
60
+ "data": {
61
+ "text/plain": [
62
+ " Unnamed: 0 Make Year of manufacture Condition Mileage \\\n",
63
+ "0 0 Toyota 2007.0 Nigerian Used 166418.0 \n",
64
+ "1 1 Lexus NaN NaN 138024.0 \n",
65
+ "2 2 Mercedes-Benz 2008.0 Nigerian Used 376807.0 \n",
66
+ "3 3 Lexus NaN NaN 213362.0 \n",
67
+ "4 4 Mercedes-Benz NaN NaN 106199.0 \n",
68
+ "\n",
69
+ " Engine Size Fuel Transmission Price Build \n",
70
+ "0 2400.0 Petrol Automatic 3,120,000 NaN \n",
71
+ "1 NaN NaN Automatic 5,834,000 NaN \n",
72
+ "2 3000.0 Petrol Automatic 3,640,000 NaN \n",
73
+ "3 NaN NaN Automatic 3,594,000 NaN \n",
74
+ "4 NaN NaN Automatic 8,410,000 NaN "
75
+ ],
76
+ "text/html": [
77
+ "\n",
78
+ " <div id=\"df-e7f12378-3a0c-4bd1-b3da-c57aedf9443c\">\n",
79
+ " <div class=\"colab-df-container\">\n",
80
+ " <div>\n",
81
+ "<style scoped>\n",
82
+ " .dataframe tbody tr th:only-of-type {\n",
83
+ " vertical-align: middle;\n",
84
+ " }\n",
85
+ "\n",
86
+ " .dataframe tbody tr th {\n",
87
+ " vertical-align: top;\n",
88
+ " }\n",
89
+ "\n",
90
+ " .dataframe thead th {\n",
91
+ " text-align: right;\n",
92
+ " }\n",
93
+ "</style>\n",
94
+ "<table border=\"1\" class=\"dataframe\">\n",
95
+ " <thead>\n",
96
+ " <tr style=\"text-align: right;\">\n",
97
+ " <th></th>\n",
98
+ " <th>Unnamed: 0</th>\n",
99
+ " <th>Make</th>\n",
100
+ " <th>Year of manufacture</th>\n",
101
+ " <th>Condition</th>\n",
102
+ " <th>Mileage</th>\n",
103
+ " <th>Engine Size</th>\n",
104
+ " <th>Fuel</th>\n",
105
+ " <th>Transmission</th>\n",
106
+ " <th>Price</th>\n",
107
+ " <th>Build</th>\n",
108
+ " </tr>\n",
109
+ " </thead>\n",
110
+ " <tbody>\n",
111
+ " <tr>\n",
112
+ " <th>0</th>\n",
113
+ " <td>0</td>\n",
114
+ " <td>Toyota</td>\n",
115
+ " <td>2007.0</td>\n",
116
+ " <td>Nigerian Used</td>\n",
117
+ " <td>166418.0</td>\n",
118
+ " <td>2400.0</td>\n",
119
+ " <td>Petrol</td>\n",
120
+ " <td>Automatic</td>\n",
121
+ " <td>3,120,000</td>\n",
122
+ " <td>NaN</td>\n",
123
+ " </tr>\n",
124
+ " <tr>\n",
125
+ " <th>1</th>\n",
126
+ " <td>1</td>\n",
127
+ " <td>Lexus</td>\n",
128
+ " <td>NaN</td>\n",
129
+ " <td>NaN</td>\n",
130
+ " <td>138024.0</td>\n",
131
+ " <td>NaN</td>\n",
132
+ " <td>NaN</td>\n",
133
+ " <td>Automatic</td>\n",
134
+ " <td>5,834,000</td>\n",
135
+ " <td>NaN</td>\n",
136
+ " </tr>\n",
137
+ " <tr>\n",
138
+ " <th>2</th>\n",
139
+ " <td>2</td>\n",
140
+ " <td>Mercedes-Benz</td>\n",
141
+ " <td>2008.0</td>\n",
142
+ " <td>Nigerian Used</td>\n",
143
+ " <td>376807.0</td>\n",
144
+ " <td>3000.0</td>\n",
145
+ " <td>Petrol</td>\n",
146
+ " <td>Automatic</td>\n",
147
+ " <td>3,640,000</td>\n",
148
+ " <td>NaN</td>\n",
149
+ " </tr>\n",
150
+ " <tr>\n",
151
+ " <th>3</th>\n",
152
+ " <td>3</td>\n",
153
+ " <td>Lexus</td>\n",
154
+ " <td>NaN</td>\n",
155
+ " <td>NaN</td>\n",
156
+ " <td>213362.0</td>\n",
157
+ " <td>NaN</td>\n",
158
+ " <td>NaN</td>\n",
159
+ " <td>Automatic</td>\n",
160
+ " <td>3,594,000</td>\n",
161
+ " <td>NaN</td>\n",
162
+ " </tr>\n",
163
+ " <tr>\n",
164
+ " <th>4</th>\n",
165
+ " <td>4</td>\n",
166
+ " <td>Mercedes-Benz</td>\n",
167
+ " <td>NaN</td>\n",
168
+ " <td>NaN</td>\n",
169
+ " <td>106199.0</td>\n",
170
+ " <td>NaN</td>\n",
171
+ " <td>NaN</td>\n",
172
+ " <td>Automatic</td>\n",
173
+ " <td>8,410,000</td>\n",
174
+ " <td>NaN</td>\n",
175
+ " </tr>\n",
176
+ " </tbody>\n",
177
+ "</table>\n",
178
+ "</div>\n",
179
+ " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-e7f12378-3a0c-4bd1-b3da-c57aedf9443c')\"\n",
180
+ " title=\"Convert this dataframe to an interactive table.\"\n",
181
+ " style=\"display:none;\">\n",
182
+ " \n",
183
+ " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
184
+ " width=\"24px\">\n",
185
+ " <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
186
+ " <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
187
+ " </svg>\n",
188
+ " </button>\n",
189
+ " \n",
190
+ " <style>\n",
191
+ " .colab-df-container {\n",
192
+ " display:flex;\n",
193
+ " flex-wrap:wrap;\n",
194
+ " gap: 12px;\n",
195
+ " }\n",
196
+ "\n",
197
+ " .colab-df-convert {\n",
198
+ " background-color: #E8F0FE;\n",
199
+ " border: none;\n",
200
+ " border-radius: 50%;\n",
201
+ " cursor: pointer;\n",
202
+ " display: none;\n",
203
+ " fill: #1967D2;\n",
204
+ " height: 32px;\n",
205
+ " padding: 0 0 0 0;\n",
206
+ " width: 32px;\n",
207
+ " }\n",
208
+ "\n",
209
+ " .colab-df-convert:hover {\n",
210
+ " background-color: #E2EBFA;\n",
211
+ " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
212
+ " fill: #174EA6;\n",
213
+ " }\n",
214
+ "\n",
215
+ " [theme=dark] .colab-df-convert {\n",
216
+ " background-color: #3B4455;\n",
217
+ " fill: #D2E3FC;\n",
218
+ " }\n",
219
+ "\n",
220
+ " [theme=dark] .colab-df-convert:hover {\n",
221
+ " background-color: #434B5C;\n",
222
+ " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
223
+ " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
224
+ " fill: #FFFFFF;\n",
225
+ " }\n",
226
+ " </style>\n",
227
+ "\n",
228
+ " <script>\n",
229
+ " const buttonEl =\n",
230
+ " document.querySelector('#df-e7f12378-3a0c-4bd1-b3da-c57aedf9443c button.colab-df-convert');\n",
231
+ " buttonEl.style.display =\n",
232
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
233
+ "\n",
234
+ " async function convertToInteractive(key) {\n",
235
+ " const element = document.querySelector('#df-e7f12378-3a0c-4bd1-b3da-c57aedf9443c');\n",
236
+ " const dataTable =\n",
237
+ " await google.colab.kernel.invokeFunction('convertToInteractive',\n",
238
+ " [key], {});\n",
239
+ " if (!dataTable) return;\n",
240
+ "\n",
241
+ " const docLinkHtml = 'Like what you see? Visit the ' +\n",
242
+ " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
243
+ " + ' to learn more about interactive tables.';\n",
244
+ " element.innerHTML = '';\n",
245
+ " dataTable['output_type'] = 'display_data';\n",
246
+ " await google.colab.output.renderOutput(dataTable, element);\n",
247
+ " const docLink = document.createElement('div');\n",
248
+ " docLink.innerHTML = docLinkHtml;\n",
249
+ " element.appendChild(docLink);\n",
250
+ " }\n",
251
+ " </script>\n",
252
+ " </div>\n",
253
+ " </div>\n",
254
+ " "
255
+ ]
256
+ },
257
+ "metadata": {},
258
+ "execution_count": 5
259
+ }
260
+ ],
261
+ "source": [
262
+ "df.head()"
263
+ ]
264
+ },
265
+ {
266
+ "cell_type": "code",
267
+ "execution_count": 6,
268
+ "id": "30f57450",
269
+ "metadata": {
270
+ "colab": {
271
+ "base_uri": "https://localhost:8080/"
272
+ },
273
+ "id": "30f57450",
274
+ "outputId": "462327ca-b494-4cc7-d8d1-aa765e166650"
275
+ },
276
+ "outputs": [
277
+ {
278
+ "output_type": "stream",
279
+ "name": "stdout",
280
+ "text": [
281
+ "<class 'pandas.core.frame.DataFrame'>\n",
282
+ "RangeIndex: 4095 entries, 0 to 4094\n",
283
+ "Data columns (total 10 columns):\n",
284
+ " # Column Non-Null Count Dtype \n",
285
+ "--- ------ -------------- ----- \n",
286
+ " 0 Unnamed: 0 4095 non-null int64 \n",
287
+ " 1 Make 4095 non-null object \n",
288
+ " 2 Year of manufacture 3617 non-null float64\n",
289
+ " 3 Condition 3616 non-null object \n",
290
+ " 4 Mileage 4024 non-null float64\n",
291
+ " 5 Engine Size 3584 non-null float64\n",
292
+ " 6 Fuel 3607 non-null object \n",
293
+ " 7 Transmission 4075 non-null object \n",
294
+ " 8 Price 4095 non-null object \n",
295
+ " 9 Build 1127 non-null object \n",
296
+ "dtypes: float64(3), int64(1), object(6)\n",
297
+ "memory usage: 320.0+ KB\n"
298
+ ]
299
+ }
300
+ ],
301
+ "source": [
302
+ "df.info()"
303
+ ]
304
+ },
305
+ {
306
+ "cell_type": "markdown",
307
+ "id": "2b138a73",
308
+ "metadata": {
309
+ "id": "2b138a73"
310
+ },
311
+ "source": [
312
+ "### Data Cleaning"
313
+ ]
314
+ },
315
+ {
316
+ "cell_type": "code",
317
+ "execution_count": 7,
318
+ "id": "fd78bcc0",
319
+ "metadata": {
320
+ "id": "fd78bcc0"
321
+ },
322
+ "outputs": [],
323
+ "source": [
324
+ "df = df.drop('Build', axis = 1)"
325
+ ]
326
+ },
327
+ {
328
+ "cell_type": "code",
329
+ "execution_count": 8,
330
+ "id": "60013f82",
331
+ "metadata": {
332
+ "id": "60013f82"
333
+ },
334
+ "outputs": [],
335
+ "source": [
336
+ "df = df.dropna()"
337
+ ]
338
+ },
339
+ {
340
+ "cell_type": "code",
341
+ "execution_count": 9,
342
+ "id": "62b833d4",
343
+ "metadata": {
344
+ "colab": {
345
+ "base_uri": "https://localhost:8080/"
346
+ },
347
+ "id": "62b833d4",
348
+ "outputId": "05f88dbc-c2db-45be-c1c1-0f8553706eae"
349
+ },
350
+ "outputs": [
351
+ {
352
+ "output_type": "execute_result",
353
+ "data": {
354
+ "text/plain": [
355
+ "(3523, 9)"
356
+ ]
357
+ },
358
+ "metadata": {},
359
+ "execution_count": 9
360
+ }
361
+ ],
362
+ "source": [
363
+ "df.shape"
364
+ ]
365
+ },
366
+ {
367
+ "cell_type": "code",
368
+ "execution_count": 10,
369
+ "id": "e04b4172",
370
+ "metadata": {
371
+ "id": "e04b4172"
372
+ },
373
+ "outputs": [],
374
+ "source": [
375
+ "df['Price'] = df['Price'].str.replace(',', '') \n",
376
+ "df['Price'] = df['Price'].astype(float) \n",
377
+ "\n",
378
+ "df['Year of manufacture'] = df['Year of manufacture'].astype(int) "
379
+ ]
380
+ },
381
+ {
382
+ "cell_type": "code",
383
+ "execution_count": 11,
384
+ "id": "c62daca5",
385
+ "metadata": {
386
+ "colab": {
387
+ "base_uri": "https://localhost:8080/",
388
+ "height": 300
389
+ },
390
+ "id": "c62daca5",
391
+ "outputId": "6639a400-6ded-4f42-cbe5-4469c7fa27f2"
392
+ },
393
+ "outputs": [
394
+ {
395
+ "output_type": "execute_result",
396
+ "data": {
397
+ "text/plain": [
398
+ " Unnamed: 0 Year of manufacture Mileage Engine Size \\\n",
399
+ "count 3523.000000 3523.000000 3.523000e+03 3523.000000 \n",
400
+ "mean 2089.276753 2007.921090 1.901794e+05 3170.591541 \n",
401
+ "std 1187.608368 4.303771 2.215162e+05 4641.379934 \n",
402
+ "min 0.000000 1992.000000 1.000000e+00 3.000000 \n",
403
+ "25% 1066.500000 2005.000000 1.070360e+05 2000.000000 \n",
404
+ "50% 2085.000000 2008.000000 1.670060e+05 2500.000000 \n",
405
+ "75% 3136.500000 2011.000000 2.397715e+05 3500.000000 \n",
406
+ "max 4094.000000 2021.000000 9.976050e+06 184421.000000 \n",
407
+ "\n",
408
+ " Price \n",
409
+ "count 3.523000e+03 \n",
410
+ "mean 4.060590e+06 \n",
411
+ "std 4.520306e+06 \n",
412
+ "min 4.725000e+05 \n",
413
+ "25% 1.800000e+06 \n",
414
+ "50% 2.835000e+06 \n",
415
+ "75% 4.500000e+06 \n",
416
+ "max 5.880000e+07 "
417
+ ],
418
+ "text/html": [
419
+ "\n",
420
+ " <div id=\"df-c6ed75da-f06e-4c10-914e-eb4a7cf570d1\">\n",
421
+ " <div class=\"colab-df-container\">\n",
422
+ " <div>\n",
423
+ "<style scoped>\n",
424
+ " .dataframe tbody tr th:only-of-type {\n",
425
+ " vertical-align: middle;\n",
426
+ " }\n",
427
+ "\n",
428
+ " .dataframe tbody tr th {\n",
429
+ " vertical-align: top;\n",
430
+ " }\n",
431
+ "\n",
432
+ " .dataframe thead th {\n",
433
+ " text-align: right;\n",
434
+ " }\n",
435
+ "</style>\n",
436
+ "<table border=\"1\" class=\"dataframe\">\n",
437
+ " <thead>\n",
438
+ " <tr style=\"text-align: right;\">\n",
439
+ " <th></th>\n",
440
+ " <th>Unnamed: 0</th>\n",
441
+ " <th>Year of manufacture</th>\n",
442
+ " <th>Mileage</th>\n",
443
+ " <th>Engine Size</th>\n",
444
+ " <th>Price</th>\n",
445
+ " </tr>\n",
446
+ " </thead>\n",
447
+ " <tbody>\n",
448
+ " <tr>\n",
449
+ " <th>count</th>\n",
450
+ " <td>3523.000000</td>\n",
451
+ " <td>3523.000000</td>\n",
452
+ " <td>3.523000e+03</td>\n",
453
+ " <td>3523.000000</td>\n",
454
+ " <td>3.523000e+03</td>\n",
455
+ " </tr>\n",
456
+ " <tr>\n",
457
+ " <th>mean</th>\n",
458
+ " <td>2089.276753</td>\n",
459
+ " <td>2007.921090</td>\n",
460
+ " <td>1.901794e+05</td>\n",
461
+ " <td>3170.591541</td>\n",
462
+ " <td>4.060590e+06</td>\n",
463
+ " </tr>\n",
464
+ " <tr>\n",
465
+ " <th>std</th>\n",
466
+ " <td>1187.608368</td>\n",
467
+ " <td>4.303771</td>\n",
468
+ " <td>2.215162e+05</td>\n",
469
+ " <td>4641.379934</td>\n",
470
+ " <td>4.520306e+06</td>\n",
471
+ " </tr>\n",
472
+ " <tr>\n",
473
+ " <th>min</th>\n",
474
+ " <td>0.000000</td>\n",
475
+ " <td>1992.000000</td>\n",
476
+ " <td>1.000000e+00</td>\n",
477
+ " <td>3.000000</td>\n",
478
+ " <td>4.725000e+05</td>\n",
479
+ " </tr>\n",
480
+ " <tr>\n",
481
+ " <th>25%</th>\n",
482
+ " <td>1066.500000</td>\n",
483
+ " <td>2005.000000</td>\n",
484
+ " <td>1.070360e+05</td>\n",
485
+ " <td>2000.000000</td>\n",
486
+ " <td>1.800000e+06</td>\n",
487
+ " </tr>\n",
488
+ " <tr>\n",
489
+ " <th>50%</th>\n",
490
+ " <td>2085.000000</td>\n",
491
+ " <td>2008.000000</td>\n",
492
+ " <td>1.670060e+05</td>\n",
493
+ " <td>2500.000000</td>\n",
494
+ " <td>2.835000e+06</td>\n",
495
+ " </tr>\n",
496
+ " <tr>\n",
497
+ " <th>75%</th>\n",
498
+ " <td>3136.500000</td>\n",
499
+ " <td>2011.000000</td>\n",
500
+ " <td>2.397715e+05</td>\n",
501
+ " <td>3500.000000</td>\n",
502
+ " <td>4.500000e+06</td>\n",
503
+ " </tr>\n",
504
+ " <tr>\n",
505
+ " <th>max</th>\n",
506
+ " <td>4094.000000</td>\n",
507
+ " <td>2021.000000</td>\n",
508
+ " <td>9.976050e+06</td>\n",
509
+ " <td>184421.000000</td>\n",
510
+ " <td>5.880000e+07</td>\n",
511
+ " </tr>\n",
512
+ " </tbody>\n",
513
+ "</table>\n",
514
+ "</div>\n",
515
+ " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-c6ed75da-f06e-4c10-914e-eb4a7cf570d1')\"\n",
516
+ " title=\"Convert this dataframe to an interactive table.\"\n",
517
+ " style=\"display:none;\">\n",
518
+ " \n",
519
+ " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
520
+ " width=\"24px\">\n",
521
+ " <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
522
+ " <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
523
+ " </svg>\n",
524
+ " </button>\n",
525
+ " \n",
526
+ " <style>\n",
527
+ " .colab-df-container {\n",
528
+ " display:flex;\n",
529
+ " flex-wrap:wrap;\n",
530
+ " gap: 12px;\n",
531
+ " }\n",
532
+ "\n",
533
+ " .colab-df-convert {\n",
534
+ " background-color: #E8F0FE;\n",
535
+ " border: none;\n",
536
+ " border-radius: 50%;\n",
537
+ " cursor: pointer;\n",
538
+ " display: none;\n",
539
+ " fill: #1967D2;\n",
540
+ " height: 32px;\n",
541
+ " padding: 0 0 0 0;\n",
542
+ " width: 32px;\n",
543
+ " }\n",
544
+ "\n",
545
+ " .colab-df-convert:hover {\n",
546
+ " background-color: #E2EBFA;\n",
547
+ " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
548
+ " fill: #174EA6;\n",
549
+ " }\n",
550
+ "\n",
551
+ " [theme=dark] .colab-df-convert {\n",
552
+ " background-color: #3B4455;\n",
553
+ " fill: #D2E3FC;\n",
554
+ " }\n",
555
+ "\n",
556
+ " [theme=dark] .colab-df-convert:hover {\n",
557
+ " background-color: #434B5C;\n",
558
+ " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
559
+ " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
560
+ " fill: #FFFFFF;\n",
561
+ " }\n",
562
+ " </style>\n",
563
+ "\n",
564
+ " <script>\n",
565
+ " const buttonEl =\n",
566
+ " document.querySelector('#df-c6ed75da-f06e-4c10-914e-eb4a7cf570d1 button.colab-df-convert');\n",
567
+ " buttonEl.style.display =\n",
568
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
569
+ "\n",
570
+ " async function convertToInteractive(key) {\n",
571
+ " const element = document.querySelector('#df-c6ed75da-f06e-4c10-914e-eb4a7cf570d1');\n",
572
+ " const dataTable =\n",
573
+ " await google.colab.kernel.invokeFunction('convertToInteractive',\n",
574
+ " [key], {});\n",
575
+ " if (!dataTable) return;\n",
576
+ "\n",
577
+ " const docLinkHtml = 'Like what you see? Visit the ' +\n",
578
+ " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
579
+ " + ' to learn more about interactive tables.';\n",
580
+ " element.innerHTML = '';\n",
581
+ " dataTable['output_type'] = 'display_data';\n",
582
+ " await google.colab.output.renderOutput(dataTable, element);\n",
583
+ " const docLink = document.createElement('div');\n",
584
+ " docLink.innerHTML = docLinkHtml;\n",
585
+ " element.appendChild(docLink);\n",
586
+ " }\n",
587
+ " </script>\n",
588
+ " </div>\n",
589
+ " </div>\n",
590
+ " "
591
+ ]
592
+ },
593
+ "metadata": {},
594
+ "execution_count": 11
595
+ }
596
+ ],
597
+ "source": [
598
+ "df.describe()"
599
+ ]
600
+ },
601
+ {
602
+ "cell_type": "markdown",
603
+ "id": "910be70f",
604
+ "metadata": {
605
+ "id": "910be70f"
606
+ },
607
+ "source": [
608
+ "### EDA"
609
+ ]
610
+ },
611
+ {
612
+ "cell_type": "markdown",
613
+ "id": "90e49305",
614
+ "metadata": {
615
+ "id": "90e49305"
616
+ },
617
+ "source": [
618
+ "### Feature Engineering"
619
+ ]
620
+ },
621
+ {
622
+ "cell_type": "code",
623
+ "source": [
624
+ "#the brand new is just 5, it will be drop\n",
625
+ "# Dropping the 'Brand New' category\n",
626
+ "df = df[df['Condition'] != 'Brand New']"
627
+ ],
628
+ "metadata": {
629
+ "id": "PkF02_5ah3bB"
630
+ },
631
+ "id": "PkF02_5ah3bB",
632
+ "execution_count": 35,
633
+ "outputs": []
634
+ },
635
+ {
636
+ "cell_type": "code",
637
+ "execution_count": 38,
638
+ "id": "544f2b81",
639
+ "metadata": {
640
+ "colab": {
641
+ "base_uri": "https://localhost:8080/"
642
+ },
643
+ "id": "544f2b81",
644
+ "outputId": "efdf1889-b1b6-445c-901a-acab17d1cda1"
645
+ },
646
+ "outputs": [
647
+ {
648
+ "output_type": "execute_result",
649
+ "data": {
650
+ "text/plain": [
651
+ "['scaler.joblib']"
652
+ ]
653
+ },
654
+ "metadata": {},
655
+ "execution_count": 38
656
+ }
657
+ ],
658
+ "source": [
659
+ "X = df.drop(['Unnamed: 0', 'Price'], axis = 1)\n",
660
+ "y = df.Price\n",
661
+ "\n",
662
+ "make_counts = X['Make'].value_counts()\n",
663
+ "\n",
664
+ "\n",
665
+ "# Get the values to replace with 'Others'\n",
666
+ "make_others = make_counts[make_counts < 14].index.tolist()\n",
667
+ "\n",
668
+ "# Replace values with 'Others'\n",
669
+ "X['Make'] = X['Make'].apply(lambda x: 'Others' if x in make_others else x)\n",
670
+ "\n",
671
+ "X_train,X_test, y_train,y_test = train_test_split(X,y, test_size = 0.2, random_state=10)\n",
672
+ "\n",
673
+ "\n",
674
+ "# Initializing the encoders and scaler for each column\n",
675
+ "make_encoder = LabelEncoder()\n",
676
+ "fuel_encoder = LabelEncoder()\n",
677
+ "transmission_encoder = LabelEncoder()\n",
678
+ "condition_encoder = LabelEncoder()\n",
679
+ "scaler = MinMaxScaler()\n",
680
+ "\n",
681
+ "# Encoding and scaling each column individually\n",
682
+ "X_train['Make'] = make_encoder.fit_transform(X_train['Make'])\n",
683
+ "X_test['Make'] = make_encoder.transform(X_test['Make'])\n",
684
+ "\n",
685
+ "X_train['Fuel'] = fuel_encoder.fit_transform(X_train['Fuel'])\n",
686
+ "X_test['Fuel'] = fuel_encoder.transform(X_test['Fuel'])\n",
687
+ "\n",
688
+ "X_train['Transmission'] = transmission_encoder.fit_transform(X_train['Transmission'])\n",
689
+ "X_test['Transmission'] = transmission_encoder.transform(X_test['Transmission'])\n",
690
+ "\n",
691
+ "X_train['Condition'] = condition_encoder.fit_transform(X_train['Condition'])\n",
692
+ "X_test['Condition'] = condition_encoder.transform(X_test['Condition'])\n",
693
+ "\n",
694
+ "X_train[['Year of manufacture', 'Mileage', 'Engine Size']] = scaler.fit_transform(X_train[['Year of manufacture', 'Mileage', 'Engine Size']])\n",
695
+ "X_test[['Year of manufacture', 'Mileage', 'Engine Size']] = scaler.transform(X_test[['Year of manufacture', 'Mileage', 'Engine Size']])\n",
696
+ "\n",
697
+ "# Save the encoders and scaler\n",
698
+ "joblib.dump(make_encoder, \"make_encoder.joblib\",compress=3)\n",
699
+ "joblib.dump(fuel_encoder, \"fuel_encoder.joblib\",compress=3)\n",
700
+ "joblib.dump(transmission_encoder, \"transmission_encoder.joblib\",compress=3)\n",
701
+ "joblib.dump(condition_encoder, \"condition_encoder.joblib\",compress=3)\n",
702
+ "joblib.dump(scaler, \"scaler.joblib\",compress=3)"
703
+ ]
704
+ },
705
+ {
706
+ "cell_type": "markdown",
707
+ "id": "307eab41",
708
+ "metadata": {
709
+ "id": "307eab41"
710
+ },
711
+ "source": [
712
+ "#### Needed Model"
713
+ ]
714
+ },
715
+ {
716
+ "cell_type": "code",
717
+ "execution_count": 39,
718
+ "id": "23aaa0f7",
719
+ "metadata": {
720
+ "colab": {
721
+ "base_uri": "https://localhost:8080/"
722
+ },
723
+ "id": "23aaa0f7",
724
+ "outputId": "7ac3f946-76f2-4e32-bda3-84106fcec209"
725
+ },
726
+ "outputs": [
727
+ {
728
+ "output_type": "stream",
729
+ "name": "stdout",
730
+ "text": [
731
+ "Random Forest RMSE: 1900923.15\n",
732
+ "XGBoost RMSE: 1881430.11\n",
733
+ "Linear Regression RMSE: 3227815.24\n"
734
+ ]
735
+ }
736
+ ],
737
+ "source": [
738
+ "# Initialize the models\n",
739
+ "rf_model = RandomForestRegressor(random_state=42)\n",
740
+ "xgb_model = XGBRegressor(random_state=42)\n",
741
+ "lr_model = LinearRegression()\n",
742
+ "\n",
743
+ "# Fit the models on the training data\n",
744
+ "rf_model.fit(X_train, y_train)\n",
745
+ "xgb_model.fit(X_train, y_train)\n",
746
+ "lr_model.fit(X_train, y_train)\n",
747
+ "\n",
748
+ "# Make predictions on the testing data\n",
749
+ "rf_preds = rf_model.predict(X_test)\n",
750
+ "xgb_preds = xgb_model.predict(X_test)\n",
751
+ "lr_preds = lr_model.predict(X_test)\n",
752
+ "\n",
753
+ "# Evaluate the models using root mean squared error (RMSE)\n",
754
+ "rf_rmse = mean_squared_error(y_test, rf_preds, squared=False)\n",
755
+ "xgb_rmse = mean_squared_error(y_test, xgb_preds, squared=False)\n",
756
+ "lr_rmse = mean_squared_error(y_test, lr_preds, squared=False)\n",
757
+ "\n",
758
+ "# Print the RMSE scores\n",
759
+ "print(f\"Random Forest RMSE: {rf_rmse:.2f}\")\n",
760
+ "print(f\"XGBoost RMSE: {xgb_rmse:.2f}\")\n",
761
+ "print(f\"Linear Regression RMSE: {lr_rmse:.2f}\")"
762
+ ]
763
+ },
764
+ {
765
+ "cell_type": "code",
766
+ "source": [
767
+ "# R2 score\n",
768
+ "rf_r2 = r2_score(y_test, rf_preds)\n",
769
+ "print(\"Random Forest R2 Score:\", rf_r2)\n",
770
+ "\n",
771
+ "\n",
772
+ "xgb_r2 = r2_score(y_test, xgb_preds)\n",
773
+ "print(\"XGBoost R2 Score:\", xgb_r2)\n",
774
+ "\n",
775
+ "\n",
776
+ "lr_r2 = r2_score(y_test, lr_preds)\n",
777
+ "print(\"Linear Regression R2 Score:\", lr_r2)\n"
778
+ ],
779
+ "metadata": {
780
+ "colab": {
781
+ "base_uri": "https://localhost:8080/"
782
+ },
783
+ "id": "HAij8ecNkQf4",
784
+ "outputId": "cfeb36b4-201b-413a-8b4f-ce722b9d7ef3"
785
+ },
786
+ "id": "HAij8ecNkQf4",
787
+ "execution_count": 40,
788
+ "outputs": [
789
+ {
790
+ "output_type": "stream",
791
+ "name": "stdout",
792
+ "text": [
793
+ "Random Forest R2 Score: 0.7692007346747749\n",
794
+ "XGBoost R2 Score: 0.7739099336774033\n",
795
+ "Linear Regression R2 Score: 0.33453895627915986\n"
796
+ ]
797
+ }
798
+ ]
799
+ },
800
+ {
801
+ "cell_type": "code",
802
+ "execution_count": 41,
803
+ "id": "f9dfda36",
804
+ "metadata": {
805
+ "colab": {
806
+ "base_uri": "https://localhost:8080/"
807
+ },
808
+ "id": "f9dfda36",
809
+ "outputId": "69882d26-6915-4f06-c5af-d38ce97417cd"
810
+ },
811
+ "outputs": [
812
+ {
813
+ "output_type": "execute_result",
814
+ "data": {
815
+ "text/plain": [
816
+ "['car_model.joblib']"
817
+ ]
818
+ },
819
+ "metadata": {},
820
+ "execution_count": 41
821
+ }
822
+ ],
823
+ "source": [
824
+ "joblib.dump(xgb_model, \"car_model.joblib\", compress=3)"
825
+ ]
826
+ },
827
+ {
828
+ "cell_type": "markdown",
829
+ "id": "faeff4c7",
830
+ "metadata": {
831
+ "id": "faeff4c7"
832
+ },
833
+ "source": [
834
+ "**Note: Many Models have been built, but only the needed ones were kept**"
835
+ ]
836
+ },
837
+ {
838
+ "cell_type": "code",
839
+ "execution_count": 42,
840
+ "id": "1b6ca9be",
841
+ "metadata": {
842
+ "colab": {
843
+ "base_uri": "https://localhost:8080/",
844
+ "height": 472
845
+ },
846
+ "id": "1b6ca9be",
847
+ "outputId": "a049c64e-ea4f-44d3-9bfb-4a03cc01a7cf"
848
+ },
849
+ "outputs": [
850
+ {
851
+ "output_type": "display_data",
852
+ "data": {
853
+ "text/plain": [
854
+ "<Figure size 640x480 with 1 Axes>"
855
+ ],
856
+ "image/png": "\n"
857
+ },
858
+ "metadata": {}
859
+ }
860
+ ],
861
+ "source": [
862
+ "sns.histplot(xgb_preds, label='prediction',color='red')\n",
863
+ "sns.histplot(y_test, label='actual price', color = 'blue')\n",
864
+ "plt.title('Prediction Vs Actual')\n",
865
+ "plt.legend()\n",
866
+ "plt.show()"
867
+ ]
868
+ },
869
+ {
870
+ "cell_type": "markdown",
871
+ "id": "e921f047",
872
+ "metadata": {
873
+ "id": "e921f047"
874
+ },
875
+ "source": [
876
+ "### Prediction"
877
+ ]
878
+ },
879
+ {
880
+ "cell_type": "code",
881
+ "execution_count": 43,
882
+ "id": "e23ac604",
883
+ "metadata": {
884
+ "id": "e23ac604"
885
+ },
886
+ "outputs": [],
887
+ "source": [
888
+ "import joblib\n",
889
+ "def predict_car_price(make, year, condition, mileage, engine_size, fuel, transmission):\n",
890
+ " # Load the encoders and scaler\n",
891
+ " make_encoder = joblib.load(\"make_encoder.joblib\")\n",
892
+ " fuel_encoder = joblib.load(\"fuel_encoder.joblib\")\n",
893
+ " transmission_encoder = joblib.load(\"transmission_encoder.joblib\")\n",
894
+ " condition_encoder = joblib.load(\"condition_encoder.joblib\")\n",
895
+ " scaler = joblib.load(\"scaler.joblib\")\n",
896
+ "\n",
897
+ " # Preprocess the input\n",
898
+ " make_encoded = make_encoder.transform([make])[0]\n",
899
+ " numerical_value = scaler.transform([[year,mileage, engine_size]])\n",
900
+ " year_scaled = numerical_value[0][0]\n",
901
+ " mileage_scaled = numerical_value[0][1]\n",
902
+ " engine_size_scaled = numerical_value[0][2]\n",
903
+ " fuel_encoded = fuel_encoder.transform([fuel])[0]\n",
904
+ " condition_encoded = condition_encoder.transform([condition])[0]\n",
905
+ " transmission_encoded = transmission_encoder.transform([transmission])[0]\n",
906
+ "\n",
907
+ " input_data = [[make_encoded, year_scaled, condition_encoded, mileage_scaled, engine_size_scaled, fuel_encoded, transmission_encoded]]\n",
908
+ " input_df = pd.DataFrame(input_data, columns=['Make', 'Year of manufacture', 'Condition', 'Mileage', 'Engine Size', 'Fuel', 'Transmission'])\n",
909
+ "\n",
910
+ " # Make predictions\n",
911
+ " predicted_price = xgb_model.predict(input_df)\n",
912
+ " return round(predicted_price[0], 2)"
913
+ ]
914
+ },
915
+ {
916
+ "cell_type": "code",
917
+ "execution_count": 44,
918
+ "id": "07692f2e",
919
+ "metadata": {
920
+ "colab": {
921
+ "base_uri": "https://localhost:8080/"
922
+ },
923
+ "id": "07692f2e",
924
+ "outputId": "c70a6f63-72db-4129-e38a-2f319e506f35"
925
+ },
926
+ "outputs": [
927
+ {
928
+ "output_type": "execute_result",
929
+ "data": {
930
+ "text/plain": [
931
+ "4970118.0"
932
+ ]
933
+ },
934
+ "metadata": {},
935
+ "execution_count": 44
936
+ }
937
+ ],
938
+ "source": [
939
+ "predict_car_price('Toyota', 2010,'Nigerian Used', 3000, 2300, 'Petrol', 'Automatic')"
940
+ ]
941
+ },
942
+ {
943
+ "cell_type": "markdown",
944
+ "id": "fce6ae74",
945
+ "metadata": {
946
+ "id": "fce6ae74"
947
+ },
948
+ "source": [
949
+ "### Gradio Interface"
950
+ ]
951
+ },
952
+ {
953
+ "cell_type": "code",
954
+ "source": [
955
+ "import gradio as gr\n",
956
+ "import joblib\n",
957
+ "def predict_car_price(make, year, condition, mileage, engine_size, fuel, transmission):\n",
958
+ " # Load the encoders and scaler\n",
959
+ " make_encoder = joblib.load(\"make_encoder.joblib\")\n",
960
+ " fuel_encoder = joblib.load(\"fuel_encoder.joblib\")\n",
961
+ " transmission_encoder = joblib.load(\"transmission_encoder.joblib\")\n",
962
+ " condition_encoder = joblib.load(\"condition_encoder.joblib\")\n",
963
+ " scaler = joblib.load(\"scaler.joblib\")\n",
964
+ "\n",
965
+ " make_encoded = make_encoder.transform([make])[0]\n",
966
+ " numerical_value = scaler.transform([[year,mileage, engine_size]])\n",
967
+ " year_scaled = numerical_value[0][0]\n",
968
+ " mileage_scaled = numerical_value[0][1]\n",
969
+ " engine_size_scaled = numerical_value[0][2]\n",
970
+ " fuel_encoded = fuel_encoder.transform([fuel])[0]\n",
971
+ " condition_encoded = condition_encoder.transform([condition])[0]\n",
972
+ " transmission_encoded = transmission_encoder.transform([transmission])[0]\n",
973
+ " input_data = [[make_encoded, year_scaled, condition_encoded, mileage_scaled, engine_size_scaled, fuel_encoded, transmission_encoded]]\n",
974
+ " input_df = pd.DataFrame(input_data, columns=['Make', 'Year of manufacture', 'Condition', 'Mileage', 'Engine Size', 'Fuel', 'Transmission'])\n",
975
+ "\n",
976
+ " # Make predictions\n",
977
+ " predicted_price = xgb_model.predict(input_df)\n",
978
+ " return round(predicted_price[0], 2)\n",
979
+ "make_dropdown = gr.inputs.Dropdown(['Acura', 'Audi', 'BMW', 'Chevrolet', 'Dodge', 'Ford', 'Honda',\n",
980
+ " 'Hyundai', 'Infiniti', 'Kia', 'Land Rover', 'Lexus', 'Mazda',\n",
981
+ " 'Mercedes-Benz', 'Mitsubishi', 'Nissan', 'Peugeot',\n",
982
+ " 'Pontiac', 'Toyota', 'Volkswagen', 'Volvo'], label=\"Make\")\n",
983
+ "condition_dropdown = gr.inputs.Dropdown(['Foreign Used', 'Nigerian Used'], label=\"Condition\")\n",
984
+ "fuel_dropdown = gr.inputs.Dropdown([\"Petrol\", \"Diesel\", \"Electric\"], label=\"Fuel\")\n",
985
+ "transmission_dropdown = gr.inputs.Dropdown([\"Manual\", \"Automatic\", \"AMT\"], label=\"Transmission\")\n",
986
+ "year_slider = gr.inputs.Slider(minimum=1992, maximum=2021, step=1, default=2010, label=\"Year\")\n",
987
+ "mileage_slider = gr.inputs.Slider(minimum=1, maximum=300000, step=10, default=80000, label=\"Mileage\")\n",
988
+ "engine_size_slider = gr.inputs.Slider(minimum=1, maximum=20000, step=1, default=100, label=\"Engine Size\")\n",
989
+ "\n",
990
+ "iface = gr.Interface(\n",
991
+ "fn=predict_car_price,\n",
992
+ "inputs=[make_dropdown, year_slider, condition_dropdown, mileage_slider, engine_size_slider, fuel_dropdown, transmission_dropdown],\n",
993
+ "outputs=\"number\",\n",
994
+ "title=\"Car Price Prediction\",\n",
995
+ " description=\"Predict the price of a car based on its details, in Naira.\",\n",
996
+ " examples=[\n",
997
+ " [\"Toyota\", 2010, \"Nigerian Used\", 80000, 2.0, \"Petrol\", \"Automatic\"],\n",
998
+ " [\"Mercedes-Benz\", 2015, \"Foreign Used\", 50000, 1000, \"Diesel\", \"AMT\"],\n",
999
+ " ],css=\".gradio-container {background-color: lightgreen}\"\n",
1000
+ ")\n",
1001
+ "\n",
1002
+ "iface.launch(share = True)\n"
1003
+ ],
1004
+ "metadata": {
1005
+ "colab": {
1006
+ "base_uri": "https://localhost:8080/",
1007
+ "height": 611
1008
+ },
1009
+ "id": "0ZNR9WJ5m5dA",
1010
+ "outputId": "b4292dcc-3397-46db-d5b2-3932ff51c657"
1011
+ },
1012
+ "id": "0ZNR9WJ5m5dA",
1013
+ "execution_count": 46,
1014
+ "outputs": [
1015
+ {
1016
+ "output_type": "stream",
1017
+ "name": "stdout",
1018
+ "text": [
1019
+ "Colab notebook detected. To show errors in colab notebook, set debug=True in launch()\n",
1020
+ "Running on public URL: https://99918e8c858d7db896.gradio.live\n",
1021
+ "\n",
1022
+ "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n"
1023
+ ]
1024
+ },
1025
+ {
1026
+ "output_type": "display_data",
1027
+ "data": {
1028
+ "text/plain": [
1029
+ "<IPython.core.display.HTML object>"
1030
+ ],
1031
+ "text/html": [
1032
+ "<div><iframe src=\"https://99918e8c858d7db896.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
1033
+ ]
1034
+ },
1035
+ "metadata": {}
1036
+ },
1037
+ {
1038
+ "output_type": "execute_result",
1039
+ "data": {
1040
+ "text/plain": []
1041
+ },
1042
+ "metadata": {},
1043
+ "execution_count": 46
1044
+ }
1045
+ ]
1046
+ }
1047
+ ],
1048
+ "metadata": {
1049
+ "kernelspec": {
1050
+ "display_name": "Python 3",
1051
+ "language": "python",
1052
+ "name": "python3"
1053
+ },
1054
+ "language_info": {
1055
+ "codemirror_mode": {
1056
+ "name": "ipython",
1057
+ "version": 3
1058
+ },
1059
+ "file_extension": ".py",
1060
+ "mimetype": "text/x-python",
1061
+ "name": "python",
1062
+ "nbconvert_exporter": "python",
1063
+ "pygments_lexer": "ipython3",
1064
+ "version": "3.8.8"
1065
+ },
1066
+ "colab": {
1067
+ "provenance": []
1068
+ }
1069
+ },
1070
+ "nbformat": 4,
1071
+ "nbformat_minor": 5
1072
+ }
Nigerian_Car_Prices.csv ADDED
The diff for this file is too large to render. See raw diff
 
car_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d20f0396afd8e05a8906e37f66a873df78d94cd159458041c2506c2e47b39255
3
+ size 118321
condition_encoder.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:881d6be40f74805c0b6b7f2440dc83509af80dbb58b357d3a5e1a6d20860f1b8
3
+ size 407
fuel_encoder.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d928d31f16f22cd262183f89b49b4c7d6f78757becc7355d1368e7c49914cd19
3
+ size 415
make_encoder.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:521edbe598a7059ded42554fec5ca3aa693e5fa2099a33c5892b7de76958a6a5
3
+ size 576
nigerian_car_price_model.py ADDED
@@ -0,0 +1,233 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Nigerian Car Price Model.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1RtrEB_oX2Q9llgG2KysiBNuIg-EEtpdv
8
+ """
9
+
10
+ import pandas as pd
11
+ import numpy as np
12
+ import seaborn as sns
13
+ import matplotlib.pyplot as plt
14
+ import warnings
15
+ warnings.filterwarnings("ignore")
16
+ sns.set_style("darkgrid")
17
+ sns.set_palette('RdYlGn')
18
+
19
+ #model
20
+ from sklearn.preprocessing import LabelEncoder,StandardScaler,MinMaxScaler
21
+ from sklearn.model_selection import train_test_split
22
+ from sklearn.metrics import mean_squared_error, r2_score
23
+ from sklearn.ensemble import RandomForestRegressor
24
+ from xgboost import XGBRegressor
25
+ from sklearn.linear_model import LinearRegression
26
+
27
+ import gradio as gr
28
+ import joblib
29
+
30
+ df = pd.read_csv("/content/Nigerian_Car_Prices.csv")
31
+
32
+ df.head()
33
+
34
+ df.info()
35
+
36
+ """### Data Cleaning"""
37
+
38
+ df = df.drop('Build', axis = 1)
39
+
40
+ df = df.dropna()
41
+
42
+ df.shape
43
+
44
+ df['Price'] = df['Price'].str.replace(',', '')
45
+ df['Price'] = df['Price'].astype(float)
46
+
47
+ df['Year of manufacture'] = df['Year of manufacture'].astype(int)
48
+
49
+ df.describe()
50
+
51
+ """### EDA
52
+
53
+ ### Feature Engineering
54
+ """
55
+
56
+ #the brand new is just 5, it will be drop
57
+ # Dropping the 'Brand New' category
58
+ df = df[df['Condition'] != 'Brand New']
59
+
60
+ X = df.drop(['Unnamed: 0', 'Price'], axis = 1)
61
+ y = df.Price
62
+
63
+ make_counts = X['Make'].value_counts()
64
+
65
+
66
+ # Get the values to replace with 'Others'
67
+ make_others = make_counts[make_counts < 14].index.tolist()
68
+
69
+ # Replace values with 'Others'
70
+ X['Make'] = X['Make'].apply(lambda x: 'Others' if x in make_others else x)
71
+
72
+ X_train,X_test, y_train,y_test = train_test_split(X,y, test_size = 0.2, random_state=10)
73
+
74
+
75
+ # Initializing the encoders and scaler for each column
76
+ make_encoder = LabelEncoder()
77
+ fuel_encoder = LabelEncoder()
78
+ transmission_encoder = LabelEncoder()
79
+ condition_encoder = LabelEncoder()
80
+ scaler = MinMaxScaler()
81
+
82
+ # Encoding and scaling each column individually
83
+ X_train['Make'] = make_encoder.fit_transform(X_train['Make'])
84
+ X_test['Make'] = make_encoder.transform(X_test['Make'])
85
+
86
+ X_train['Fuel'] = fuel_encoder.fit_transform(X_train['Fuel'])
87
+ X_test['Fuel'] = fuel_encoder.transform(X_test['Fuel'])
88
+
89
+ X_train['Transmission'] = transmission_encoder.fit_transform(X_train['Transmission'])
90
+ X_test['Transmission'] = transmission_encoder.transform(X_test['Transmission'])
91
+
92
+ X_train['Condition'] = condition_encoder.fit_transform(X_train['Condition'])
93
+ X_test['Condition'] = condition_encoder.transform(X_test['Condition'])
94
+
95
+ X_train[['Year of manufacture', 'Mileage', 'Engine Size']] = scaler.fit_transform(X_train[['Year of manufacture', 'Mileage', 'Engine Size']])
96
+ X_test[['Year of manufacture', 'Mileage', 'Engine Size']] = scaler.transform(X_test[['Year of manufacture', 'Mileage', 'Engine Size']])
97
+
98
+ # Save the encoders and scaler
99
+ joblib.dump(make_encoder, "make_encoder.joblib",compress=3)
100
+ joblib.dump(fuel_encoder, "fuel_encoder.joblib",compress=3)
101
+ joblib.dump(transmission_encoder, "transmission_encoder.joblib",compress=3)
102
+ joblib.dump(condition_encoder, "condition_encoder.joblib",compress=3)
103
+ joblib.dump(scaler, "scaler.joblib",compress=3)
104
+
105
+ """#### Needed Model"""
106
+
107
+ # Initialize the models
108
+ rf_model = RandomForestRegressor(random_state=42)
109
+ xgb_model = XGBRegressor(random_state=42)
110
+ lr_model = LinearRegression()
111
+
112
+ # Fit the models on the training data
113
+ rf_model.fit(X_train, y_train)
114
+ xgb_model.fit(X_train, y_train)
115
+ lr_model.fit(X_train, y_train)
116
+
117
+ # Make predictions on the testing data
118
+ rf_preds = rf_model.predict(X_test)
119
+ xgb_preds = xgb_model.predict(X_test)
120
+ lr_preds = lr_model.predict(X_test)
121
+
122
+ # Evaluate the models using root mean squared error (RMSE)
123
+ rf_rmse = mean_squared_error(y_test, rf_preds, squared=False)
124
+ xgb_rmse = mean_squared_error(y_test, xgb_preds, squared=False)
125
+ lr_rmse = mean_squared_error(y_test, lr_preds, squared=False)
126
+
127
+ # Print the RMSE scores
128
+ print(f"Random Forest RMSE: {rf_rmse:.2f}")
129
+ print(f"XGBoost RMSE: {xgb_rmse:.2f}")
130
+ print(f"Linear Regression RMSE: {lr_rmse:.2f}")
131
+
132
+ # R2 score
133
+ rf_r2 = r2_score(y_test, rf_preds)
134
+ print("Random Forest R2 Score:", rf_r2)
135
+
136
+
137
+ xgb_r2 = r2_score(y_test, xgb_preds)
138
+ print("XGBoost R2 Score:", xgb_r2)
139
+
140
+
141
+ lr_r2 = r2_score(y_test, lr_preds)
142
+ print("Linear Regression R2 Score:", lr_r2)
143
+
144
+ joblib.dump(xgb_model, "car_model.joblib", compress=3)
145
+
146
+ """**Note: Many Models have been built, but only the needed ones were kept**"""
147
+
148
+ sns.histplot(xgb_preds, label='prediction',color='red')
149
+ sns.histplot(y_test, label='actual price', color = 'blue')
150
+ plt.title('Prediction Vs Actual')
151
+ plt.legend()
152
+ plt.show()
153
+
154
+ """### Prediction"""
155
+
156
+ import joblib
157
+ def predict_car_price(make, year, condition, mileage, engine_size, fuel, transmission):
158
+ # Load the encoders and scaler
159
+ make_encoder = joblib.load("make_encoder.joblib")
160
+ fuel_encoder = joblib.load("fuel_encoder.joblib")
161
+ transmission_encoder = joblib.load("transmission_encoder.joblib")
162
+ condition_encoder = joblib.load("condition_encoder.joblib")
163
+ scaler = joblib.load("scaler.joblib")
164
+
165
+ # Preprocess the input
166
+ make_encoded = make_encoder.transform([make])[0]
167
+ numerical_value = scaler.transform([[year,mileage, engine_size]])
168
+ year_scaled = numerical_value[0][0]
169
+ mileage_scaled = numerical_value[0][1]
170
+ engine_size_scaled = numerical_value[0][2]
171
+ fuel_encoded = fuel_encoder.transform([fuel])[0]
172
+ condition_encoded = condition_encoder.transform([condition])[0]
173
+ transmission_encoded = transmission_encoder.transform([transmission])[0]
174
+
175
+ input_data = [[make_encoded, year_scaled, condition_encoded, mileage_scaled, engine_size_scaled, fuel_encoded, transmission_encoded]]
176
+ input_df = pd.DataFrame(input_data, columns=['Make', 'Year of manufacture', 'Condition', 'Mileage', 'Engine Size', 'Fuel', 'Transmission'])
177
+
178
+ # Make predictions
179
+ predicted_price = xgb_model.predict(input_df)
180
+ return round(predicted_price[0], 2)
181
+
182
+ predict_car_price('Toyota', 2010,'Nigerian Used', 3000, 2300, 'Petrol', 'Automatic')
183
+
184
+ """### Gradio Interface"""
185
+
186
+ import gradio as gr
187
+ import joblib
188
+ def predict_car_price(make, year, condition, mileage, engine_size, fuel, transmission):
189
+ # Load the encoders and scaler
190
+ make_encoder = joblib.load("make_encoder.joblib")
191
+ fuel_encoder = joblib.load("fuel_encoder.joblib")
192
+ transmission_encoder = joblib.load("transmission_encoder.joblib")
193
+ condition_encoder = joblib.load("condition_encoder.joblib")
194
+ scaler = joblib.load("scaler.joblib")
195
+
196
+ make_encoded = make_encoder.transform([make])[0]
197
+ numerical_value = scaler.transform([[year,mileage, engine_size]])
198
+ year_scaled = numerical_value[0][0]
199
+ mileage_scaled = numerical_value[0][1]
200
+ engine_size_scaled = numerical_value[0][2]
201
+ fuel_encoded = fuel_encoder.transform([fuel])[0]
202
+ condition_encoded = condition_encoder.transform([condition])[0]
203
+ transmission_encoded = transmission_encoder.transform([transmission])[0]
204
+ input_data = [[make_encoded, year_scaled, condition_encoded, mileage_scaled, engine_size_scaled, fuel_encoded, transmission_encoded]]
205
+ input_df = pd.DataFrame(input_data, columns=['Make', 'Year of manufacture', 'Condition', 'Mileage', 'Engine Size', 'Fuel', 'Transmission'])
206
+
207
+ # Make predictions
208
+ predicted_price = xgb_model.predict(input_df)
209
+ return round(predicted_price[0], 2)
210
+ make_dropdown = gr.inputs.Dropdown(['Acura', 'Audi', 'BMW', 'Chevrolet', 'Dodge', 'Ford', 'Honda',
211
+ 'Hyundai', 'Infiniti', 'Kia', 'Land Rover', 'Lexus', 'Mazda',
212
+ 'Mercedes-Benz', 'Mitsubishi', 'Nissan', 'Peugeot',
213
+ 'Pontiac', 'Toyota', 'Volkswagen', 'Volvo'], label="Make")
214
+ condition_dropdown = gr.inputs.Dropdown(['Foreign Used', 'Nigerian Used'], label="Condition")
215
+ fuel_dropdown = gr.inputs.Dropdown(["Petrol", "Diesel", "Electric"], label="Fuel")
216
+ transmission_dropdown = gr.inputs.Dropdown(["Manual", "Automatic", "AMT"], label="Transmission")
217
+ year_slider = gr.inputs.Slider(minimum=1992, maximum=2021, step=1, default=2010, label="Year")
218
+ mileage_slider = gr.inputs.Slider(minimum=1, maximum=300000, step=10, default=80000, label="Mileage")
219
+ engine_size_slider = gr.inputs.Slider(minimum=1, maximum=20000, step=1, default=100, label="Engine Size")
220
+
221
+ iface = gr.Interface(
222
+ fn=predict_car_price,
223
+ inputs=[make_dropdown, year_slider, condition_dropdown, mileage_slider, engine_size_slider, fuel_dropdown, transmission_dropdown],
224
+ outputs="number",
225
+ title="Car Price Prediction",
226
+ description="Predict the price of a car based on its details, in Naira.",
227
+ examples=[
228
+ ["Toyota", 2010, "Nigerian Used", 80000, 2.0, "Petrol", "Automatic"],
229
+ ["Mercedes-Benz", 2015, "Foreign Used", 50000, 1000, "Diesel", "AMT"],
230
+ ],css=".gradio-container {background-color: lightgreen}"
231
+ )
232
+
233
+ iface.launch(share = True)
scaler.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39d03859e20ae59c1f30c3d6ee5c4661a01bedc5ae295626ee1291b11c2e3cc1
3
+ size 702
transmission_encoder.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d08905131ecfee80b1be35959acb37a048c2f5d1f3d9ee06530c59012cddd19
3
+ size 416