Spaces:
Sleeping
Sleeping
File size: 3,973 Bytes
8876cd2 3c78fe7 8876cd2 62e17b3 8876cd2 a506979 62e17b3 bf88d79 bfbd8d4 62e17b3 4dbe0a9 8876cd2 3c78fe7 8876cd2 197ffb2 8876cd2 197ffb2 8876cd2 197ffb2 8876cd2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
import gradio as gr
import joblib
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder
from sklearn.impute import KNNImputer
# Load your saved model
model = joblib.load("ann_model.joblib")
# # Define the prediction function
def predict(age, workclass, education, marital_status, occupation, relationship, race, gender, capital_gain, capital_loss, hours_per_week, native_country):
features = [age, workclass, education, marital_status, occupation, relationship, race, gender, capital_gain, capital_loss, hours_per_week, native_country]
fixed_features = cleaning_features(features)
prediction = model.predict(features)
prediction = 1
return "Income >50K" if prediction == 1 else "Income <=50K"
def cleaning_features(data):
le = LabelEncoder()
scaler = StandardScaler()
encoder = OneHotEncoder(sparse_output=False)
numeric_cols = ['age', 'educational-num', 'hours-per-week']
columns_to_encode = ['race','marital-status','relationship']
data.replace({'?': np.nan, 99999: np.nan}, inplace=True)
# 1. Scale numerical features
data[numeric_cols] = scaler.fit_transform(data[numeric_cols])
# 2. Label encode gender and income
data['gender'] = le.fit_transform(data['gender'])
# 3. One-hot encode race
for N in columns_to_encode:
race_encoded = encoder.fit_transform(data[[N]])
race_encoded_cols = encoder.get_feature_names_out([N])
race_encoded_df = pd.DataFrame(race_encoded, columns=race_encoded_cols, index=data.index)
# Combine the encoded data with original dataframe
data = pd.concat([data.drop(N, axis=1), race_encoded_df], axis=1)
# Binarize native country
data['native-country'] = data['native-country'].apply(lambda x: x == 'United-States')
data['native-country'] = data['native-country'].astype(int)
print(data.head(10))
return data, encoder, scaler
# Create the Gradio interface
interface = gr.Interface(
fn=predict,
inputs=[
gr.Slider(18, 90, step=1, label="Age"),
gr.Dropdown(
["Private", "Self-emp-not-inc", "Self-emp-inc", "Federal-gov",
"Local-gov", "State-gov", "Without-pay", "Never-worked"],
label="Workclass"
),
gr.Dropdown(
["Bachelors", "Some-college", "11th", "HS-grad", "Prof-school",
"Assoc-acdm", "Assoc-voc", "9th", "7th-8th", "12th", "Masters",
"1st-4th", "10th", "Doctorate", "5th-6th", "Preschool"],
label="Education"
),
gr.Dropdown(
["Married-civ-spouse", "Divorced", "Never-married", "Separated",
"Widowed", "Married-spouse-absent", "Married-AF-spouse"],
label="Marital Status"
),
gr.Dropdown(
["Tech-support", "Craft-repair", "Other-service", "Sales",
"Exec-managerial", "Prof-specialty", "Handlers-cleaners",
"Machine-op-inspct", "Adm-clerical", "Farming-fishing",
"Transport-moving", "Priv-house-serv", "Protective-serv",
"Armed-Forces"],
label="Occupation"
),
gr.Dropdown(
["Wife", "Husband", "Own-child", "Unmarried", "Other-relative", "Not-in-family"],
label="Relationship"
),
gr.Dropdown(
["White", "Black", "Asian-Pac-Islander", "Amer-Indian-Eskimo", "Other"],
label="Race"
),
gr.Dropdown(
["Male", "Female"],
label="Gender"
),
gr.Slider(1, 90, step=1, label="Hours Per Week"),
gr.Slider(0, 100000, step=100, label="Capital Gain"),
gr.Slider(0, 5000, step=50, label="Capital Loss"),
gr.Dropdown(
["United-States", "Other"],
label="Native Country"
)
],
outputs="text",
title="Adult Income Predictor"
)
# Launch the app
interface.launch()
|