File size: 3,004 Bytes
9906f45
e2e3c61
 
9906f45
 
 
 
 
 
 
 
 
00e1ce2
 
63eb0c0
00e1ce2
 
 
63eb0c0
9906f45
 
 
 
73d14c6
9906f45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be3cac1
9906f45
 
 
 
 
 
 
 
46b487e
73d14c6
00e1ce2
 
9906f45
062d250
00e1ce2
062d250
 
 
 
9906f45
 
 
 
63eb0c0
9906f45
 
 
 
 
00e1ce2
 
 
 
 
 
9906f45
 
 
3f13561
00e1ce2
9906f45
 
 
46b487e
9906f45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111

from warnings import filterwarnings
filterwarnings('ignore')
import os
import uuid
import joblib
import json
import gradio as gr
import pandas as pd
from huggingface_hub import CommitScheduler
from pathlib import Path

# print('*** Running train.py ***')
# import subprocess

# # Run the training script
# subprocess.run(["python", "train.py"])
# print('*** done! ***')

# Configure the logging functionality
log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
log_folder = log_file.parent

repo_id = "eric-green-insurance-charge-predictor-logs"

# Create a commit scheduler
scheduler = CommitScheduler(
    repo_id=repo_id,
    repo_type="dataset",
    folder_path=log_folder,
    path_in_repo="data",
    every=2
)

# Load the saved model
insurance_charge_predictor = joblib.load('model.joblib')

# Define the input features

#numeric_features = ['age', 'bmi', 'children']
#categorical_features = ['sex', 'smoker', 'region']

age_input = gr.Number(label="Age")
bmi_input = gr.Number(label="BMI")
children_input = gr.Number(label="Children")

# sex: ['female' 'male']
# smoker: ['yes' 'no']
# region: ['southwest' 'southeast' 'northwest' 'northeast']

sex_input = gr.Dropdown(['female','male'],label='Sex')
smoker_input = gr.Dropdown(['yes','no'],label='Smoker')
region_input = gr.Dropdown(['southwest', 'southeast', 'northwest', 'northeast'],label='Region')

model_output = gr.Label(label="charges")

# Define the predict function which will take features, convert to dataframe and make predictions using the saved model
# the functions runs when 'Submit' is clicked or when a API request is made

def predict_insurance_charges(age, bmi, children, sex, smoker, region):
    
    #Index(['age', 'sex', 'bmi', 'children', 'smoker', 'region'], dtype='object')

    sample = {
        'age': age,
        'sex': sex,
        'bmi': bmi,
        'children': children,
        'smoker': smoker,
        'region': region
    }

    data_point = pd.DataFrame([sample])
    
    prediction = insurance_charge_predictor.predict(data_point).tolist()

    with scheduler.lock:
        with log_file.open("a") as f:
            f.write(json.dumps(
                {
                    'age': age,
                    'sex': sex,
                    'bmi': bmi,
                    'children': children,
                    'smoker': smoker,
                    'region': region,
                    'prediction': prediction[0]
                }
            ))
            f.write("\n")

    return prediction[0]

gr_interface = gr.Interface(
    fn=predict_insurance_charges,
    inputs=[age_input,
            bmi_input,
            children_input,
            sex_input,
            smoker_input,
            region_input],
    outputs=model_output,
    title="HealthyLife Insurance Charge Prediction",
    description="This API allows you to predict insurance charges based on the input features.",
    allow_flagging="auto",
    concurrency_limit=8
)

gr_interface.queue()
gr_interface.launch(share=False)