matsammut's picture
Update app.py
bfbd8d4 verified
raw
history blame
3.97 kB
import gradio as gr
import joblib
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder
from sklearn.impute import KNNImputer
# Load your saved model
model = joblib.load("ann_model.joblib")
# # Define the prediction function
def predict(age, workclass, education, marital_status, occupation, relationship, race, gender, capital_gain, capital_loss, hours_per_week, native_country):
features = [age, workclass, education, marital_status, occupation, relationship, race, gender, capital_gain, capital_loss, hours_per_week, native_country]
fixed_features = cleaning_features(features)
prediction = model.predict(features)
prediction = 1
return "Income >50K" if prediction == 1 else "Income <=50K"
def cleaning_features(data):
le = LabelEncoder()
scaler = StandardScaler()
encoder = OneHotEncoder(sparse_output=False)
numeric_cols = ['age', 'educational-num', 'hours-per-week']
columns_to_encode = ['race','marital-status','relationship']
data.replace({'?': np.nan, 99999: np.nan}, inplace=True)
# 1. Scale numerical features
data[numeric_cols] = scaler.fit_transform(data[numeric_cols])
# 2. Label encode gender and income
data['gender'] = le.fit_transform(data['gender'])
# 3. One-hot encode race
for N in columns_to_encode:
race_encoded = encoder.fit_transform(data[[N]])
race_encoded_cols = encoder.get_feature_names_out([N])
race_encoded_df = pd.DataFrame(race_encoded, columns=race_encoded_cols, index=data.index)
# Combine the encoded data with original dataframe
data = pd.concat([data.drop(N, axis=1), race_encoded_df], axis=1)
# Binarize native country
data['native-country'] = data['native-country'].apply(lambda x: x == 'United-States')
data['native-country'] = data['native-country'].astype(int)
print(data.head(10))
return data, encoder, scaler
# Create the Gradio interface
interface = gr.Interface(
fn=predict,
inputs=[
gr.Slider(18, 90, step=1, label="Age"),
gr.Dropdown(
["Private", "Self-emp-not-inc", "Self-emp-inc", "Federal-gov",
"Local-gov", "State-gov", "Without-pay", "Never-worked"],
label="Workclass"
),
gr.Dropdown(
["Bachelors", "Some-college", "11th", "HS-grad", "Prof-school",
"Assoc-acdm", "Assoc-voc", "9th", "7th-8th", "12th", "Masters",
"1st-4th", "10th", "Doctorate", "5th-6th", "Preschool"],
label="Education"
),
gr.Dropdown(
["Married-civ-spouse", "Divorced", "Never-married", "Separated",
"Widowed", "Married-spouse-absent", "Married-AF-spouse"],
label="Marital Status"
),
gr.Dropdown(
["Tech-support", "Craft-repair", "Other-service", "Sales",
"Exec-managerial", "Prof-specialty", "Handlers-cleaners",
"Machine-op-inspct", "Adm-clerical", "Farming-fishing",
"Transport-moving", "Priv-house-serv", "Protective-serv",
"Armed-Forces"],
label="Occupation"
),
gr.Dropdown(
["Wife", "Husband", "Own-child", "Unmarried", "Other-relative", "Not-in-family"],
label="Relationship"
),
gr.Dropdown(
["White", "Black", "Asian-Pac-Islander", "Amer-Indian-Eskimo", "Other"],
label="Race"
),
gr.Dropdown(
["Male", "Female"],
label="Gender"
),
gr.Slider(1, 90, step=1, label="Hours Per Week"),
gr.Slider(0, 100000, step=100, label="Capital Gain"),
gr.Slider(0, 5000, step=50, label="Capital Loss"),
gr.Dropdown(
["United-States", "Other"],
label="Native Country"
)
],
outputs="text",
title="Adult Income Predictor"
)
# Launch the app
interface.launch()