File size: 2,897 Bytes
9906f45
e2e3c61
 
9906f45
 
 
 
 
 
 
 
 
 
 
 
 
73d14c6
9906f45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be3cac1
9906f45
 
 
 
 
 
 
 
46b487e
73d14c6
9906f45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46b487e
9906f45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73d14c6
a1fc064
73d14c6
 
 
 
a1fc064
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108

from warnings import filterwarnings
filterwarnings('ignore')
import os
import uuid
import joblib
import json
import gradio as gr
import pandas as pd
from huggingface_hub import CommitScheduler
from pathlib import Path

# Configure the logging functionality
log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
log_folder = log_file.parent

repo_id = "eric-green-insurance-charge-predictor-logs"

# Create a commit scheduler
scheduler = CommitScheduler(
    repo_id=repo_id,
    repo_type="dataset",
    folder_path=log_folder,
    path_in_repo="data",
    every=2
)

# Load the saved model
insurance_charge_predictor = joblib.load('model.joblib')

# Define the input features

#numeric_features = ['age', 'bmi', 'children']
#categorical_features = ['sex', 'smoker', 'region']

age_input = gr.Number(label="Age")
bmi_input = gr.Number(label="BMI")
children_input = gr.Number(label="Children")

# sex: ['female' 'male']
# smoker: ['yes' 'no']
# region: ['southwest' 'southeast' 'northwest' 'northeast']

sex_input = gr.Dropdown(['female','male'],label='Sex')
smoker_input = gr.Dropdown(['yes','no'],label='Smoker')
region_input = gr.Dropdown(['southwest', 'southeast', 'northwest', 'northeast'],label='Region')

model_output = gr.Label(label="charges")

# Define the predict function which will take features, convert to dataframe and make predictions using the saved model
# the functions runs when 'Submit' is clicked or when a API request is made

def predict_insurance_charges(age, bmi, children, sex, smoker, region):
    
    sample = {
        'Age': age,
        'BMI': bmi,
        'Children': children,
        'Sex': sex,
        'Smoker': smoker,
        'Region': region
    }

    data_point = pd.DataFrame([sample])
    
    prediction = insurance_charge_predictor.predict(data_point).tolist()

    with scheduler.lock:
        with log_file.open("a") as f:
            f.write(json.dumps(
                {
                    'Age': age,
                    'BMI': bmi,
                    'Children': children,
                    'Sex': sex,
                    'Smoker': smoker,
                    'Region': region,
                    'prediction': prediction[0]
                }
            ))
            
    return prediction[0]

gr_interface = gr.Interface(
    fn=predict_insurance_charges,
    inputs=[age_input,
            bmi_input,
            children_input,
            sex_input,
            smoker_input,
            region_input],
    outputs=model_output,
    title="HealthyLife Insurance Charge Prediction",
    description="This API allows you to predict insurance charges based on the input features.",
    allow_flagging="auto",
    concurrency_limit=8
)

gr_interface.queue()
gr_interface.launch(share=False)

print('*** Running train.py ***')
import subprocess

# Run the training script
subprocess.run(["python", "train.py"])
print('*** done! ***')