Spaces:
No application file
No application file
# -*- coding: utf-8 -*- | |
"""Nigerian Car Price Model.ipynb | |
Automatically generated by Colaboratory. | |
Original file is located at | |
https://colab.research.google.com/drive/1RtrEB_oX2Q9llgG2KysiBNuIg-EEtpdv | |
""" | |
import pandas as pd | |
import numpy as np | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
import warnings | |
warnings.filterwarnings("ignore") | |
sns.set_style("darkgrid") | |
sns.set_palette('RdYlGn') | |
#model | |
from sklearn.preprocessing import LabelEncoder,StandardScaler,MinMaxScaler | |
from sklearn.model_selection import train_test_split | |
from sklearn.metrics import mean_squared_error, r2_score | |
from sklearn.ensemble import RandomForestRegressor | |
from xgboost import XGBRegressor | |
from sklearn.linear_model import LinearRegression | |
import gradio as gr | |
import joblib | |
df = pd.read_csv("/content/Nigerian_Car_Prices.csv") | |
df.head() | |
df.info() | |
"""### Data Cleaning""" | |
df = df.drop('Build', axis = 1) | |
df = df.dropna() | |
df.shape | |
df['Price'] = df['Price'].str.replace(',', '') | |
df['Price'] = df['Price'].astype(float) | |
df['Year of manufacture'] = df['Year of manufacture'].astype(int) | |
df.describe() | |
"""### EDA | |
### Feature Engineering | |
""" | |
#the brand new is just 5, it will be drop | |
# Dropping the 'Brand New' category | |
df = df[df['Condition'] != 'Brand New'] | |
X = df.drop(['Unnamed: 0', 'Price'], axis = 1) | |
y = df.Price | |
make_counts = X['Make'].value_counts() | |
# Get the values to replace with 'Others' | |
make_others = make_counts[make_counts < 14].index.tolist() | |
# Replace values with 'Others' | |
X['Make'] = X['Make'].apply(lambda x: 'Others' if x in make_others else x) | |
X_train,X_test, y_train,y_test = train_test_split(X,y, test_size = 0.2, random_state=10) | |
# Initializing the encoders and scaler for each column | |
make_encoder = LabelEncoder() | |
fuel_encoder = LabelEncoder() | |
transmission_encoder = LabelEncoder() | |
condition_encoder = LabelEncoder() | |
scaler = MinMaxScaler() | |
# Encoding and scaling each column individually | |
X_train['Make'] = make_encoder.fit_transform(X_train['Make']) | |
X_test['Make'] = make_encoder.transform(X_test['Make']) | |
X_train['Fuel'] = fuel_encoder.fit_transform(X_train['Fuel']) | |
X_test['Fuel'] = fuel_encoder.transform(X_test['Fuel']) | |
X_train['Transmission'] = transmission_encoder.fit_transform(X_train['Transmission']) | |
X_test['Transmission'] = transmission_encoder.transform(X_test['Transmission']) | |
X_train['Condition'] = condition_encoder.fit_transform(X_train['Condition']) | |
X_test['Condition'] = condition_encoder.transform(X_test['Condition']) | |
X_train[['Year of manufacture', 'Mileage', 'Engine Size']] = scaler.fit_transform(X_train[['Year of manufacture', 'Mileage', 'Engine Size']]) | |
X_test[['Year of manufacture', 'Mileage', 'Engine Size']] = scaler.transform(X_test[['Year of manufacture', 'Mileage', 'Engine Size']]) | |
# Save the encoders and scaler | |
joblib.dump(make_encoder, "make_encoder.joblib",compress=3) | |
joblib.dump(fuel_encoder, "fuel_encoder.joblib",compress=3) | |
joblib.dump(transmission_encoder, "transmission_encoder.joblib",compress=3) | |
joblib.dump(condition_encoder, "condition_encoder.joblib",compress=3) | |
joblib.dump(scaler, "scaler.joblib",compress=3) | |
"""#### Needed Model""" | |
# Initialize the models | |
rf_model = RandomForestRegressor(random_state=42) | |
xgb_model = XGBRegressor(random_state=42) | |
lr_model = LinearRegression() | |
# Fit the models on the training data | |
rf_model.fit(X_train, y_train) | |
xgb_model.fit(X_train, y_train) | |
lr_model.fit(X_train, y_train) | |
# Make predictions on the testing data | |
rf_preds = rf_model.predict(X_test) | |
xgb_preds = xgb_model.predict(X_test) | |
lr_preds = lr_model.predict(X_test) | |
# Evaluate the models using root mean squared error (RMSE) | |
rf_rmse = mean_squared_error(y_test, rf_preds, squared=False) | |
xgb_rmse = mean_squared_error(y_test, xgb_preds, squared=False) | |
lr_rmse = mean_squared_error(y_test, lr_preds, squared=False) | |
# Print the RMSE scores | |
print(f"Random Forest RMSE: {rf_rmse:.2f}") | |
print(f"XGBoost RMSE: {xgb_rmse:.2f}") | |
print(f"Linear Regression RMSE: {lr_rmse:.2f}") | |
# R2 score | |
rf_r2 = r2_score(y_test, rf_preds) | |
print("Random Forest R2 Score:", rf_r2) | |
xgb_r2 = r2_score(y_test, xgb_preds) | |
print("XGBoost R2 Score:", xgb_r2) | |
lr_r2 = r2_score(y_test, lr_preds) | |
print("Linear Regression R2 Score:", lr_r2) | |
joblib.dump(xgb_model, "car_model.joblib", compress=3) | |
"""**Note: Many Models have been built, but only the needed ones were kept**""" | |
sns.histplot(xgb_preds, label='prediction',color='red') | |
sns.histplot(y_test, label='actual price', color = 'blue') | |
plt.title('Prediction Vs Actual') | |
plt.legend() | |
plt.show() | |
"""### Prediction""" | |
import joblib | |
def predict_car_price(make, year, condition, mileage, engine_size, fuel, transmission): | |
# Load the encoders and scaler | |
make_encoder = joblib.load("make_encoder.joblib") | |
fuel_encoder = joblib.load("fuel_encoder.joblib") | |
transmission_encoder = joblib.load("transmission_encoder.joblib") | |
condition_encoder = joblib.load("condition_encoder.joblib") | |
scaler = joblib.load("scaler.joblib") | |
# Preprocess the input | |
make_encoded = make_encoder.transform([make])[0] | |
numerical_value = scaler.transform([[year,mileage, engine_size]]) | |
year_scaled = numerical_value[0][0] | |
mileage_scaled = numerical_value[0][1] | |
engine_size_scaled = numerical_value[0][2] | |
fuel_encoded = fuel_encoder.transform([fuel])[0] | |
condition_encoded = condition_encoder.transform([condition])[0] | |
transmission_encoded = transmission_encoder.transform([transmission])[0] | |
input_data = [[make_encoded, year_scaled, condition_encoded, mileage_scaled, engine_size_scaled, fuel_encoded, transmission_encoded]] | |
input_df = pd.DataFrame(input_data, columns=['Make', 'Year of manufacture', 'Condition', 'Mileage', 'Engine Size', 'Fuel', 'Transmission']) | |
# Make predictions | |
predicted_price = xgb_model.predict(input_df) | |
return round(predicted_price[0], 2) | |
predict_car_price('Toyota', 2010,'Nigerian Used', 3000, 2300, 'Petrol', 'Automatic') | |
"""### Gradio Interface""" | |
import gradio as gr | |
import joblib | |
def predict_car_price(make, year, condition, mileage, engine_size, fuel, transmission): | |
# Load the encoders and scaler | |
make_encoder = joblib.load("make_encoder.joblib") | |
fuel_encoder = joblib.load("fuel_encoder.joblib") | |
transmission_encoder = joblib.load("transmission_encoder.joblib") | |
condition_encoder = joblib.load("condition_encoder.joblib") | |
scaler = joblib.load("scaler.joblib") | |
make_encoded = make_encoder.transform([make])[0] | |
numerical_value = scaler.transform([[year,mileage, engine_size]]) | |
year_scaled = numerical_value[0][0] | |
mileage_scaled = numerical_value[0][1] | |
engine_size_scaled = numerical_value[0][2] | |
fuel_encoded = fuel_encoder.transform([fuel])[0] | |
condition_encoded = condition_encoder.transform([condition])[0] | |
transmission_encoded = transmission_encoder.transform([transmission])[0] | |
input_data = [[make_encoded, year_scaled, condition_encoded, mileage_scaled, engine_size_scaled, fuel_encoded, transmission_encoded]] | |
input_df = pd.DataFrame(input_data, columns=['Make', 'Year of manufacture', 'Condition', 'Mileage', 'Engine Size', 'Fuel', 'Transmission']) | |
# Make predictions | |
predicted_price = xgb_model.predict(input_df) | |
return round(predicted_price[0], 2) | |
make_dropdown = gr.inputs.Dropdown(['Acura', 'Audi', 'BMW', 'Chevrolet', 'Dodge', 'Ford', 'Honda', | |
'Hyundai', 'Infiniti', 'Kia', 'Land Rover', 'Lexus', 'Mazda', | |
'Mercedes-Benz', 'Mitsubishi', 'Nissan', 'Peugeot', | |
'Pontiac', 'Toyota', 'Volkswagen', 'Volvo'], label="Make") | |
condition_dropdown = gr.inputs.Dropdown(['Foreign Used', 'Nigerian Used'], label="Condition") | |
fuel_dropdown = gr.inputs.Dropdown(["Petrol", "Diesel", "Electric"], label="Fuel") | |
transmission_dropdown = gr.inputs.Dropdown(["Manual", "Automatic", "AMT"], label="Transmission") | |
year_slider = gr.inputs.Slider(minimum=1992, maximum=2021, step=1, default=2010, label="Year") | |
mileage_slider = gr.inputs.Slider(minimum=1, maximum=300000, step=10, default=80000, label="Mileage") | |
engine_size_slider = gr.inputs.Slider(minimum=1, maximum=20000, step=1, default=100, label="Engine Size") | |
iface = gr.Interface( | |
fn=predict_car_price, | |
inputs=[make_dropdown, year_slider, condition_dropdown, mileage_slider, engine_size_slider, fuel_dropdown, transmission_dropdown], | |
outputs="number", | |
title="Car Price Prediction", | |
description="Predict the price of a car based on its details, in Naira.", | |
examples=[ | |
["Toyota", 2010, "Nigerian Used", 80000, 2.0, "Petrol", "Automatic"], | |
["Mercedes-Benz", 2015, "Foreign Used", 50000, 1000, "Diesel", "AMT"], | |
],css=".gradio-container {background-color: lightgreen}" | |
) | |
iface.launch(share = True) |