Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
@@ -26,14 +26,32 @@ app.add_middleware(
|
|
26 |
)
|
27 |
|
28 |
|
29 |
-
|
30 |
-
def train_the_model():
|
31 |
-
|
32 |
-
data = pd.read_csv("model/trainer_data.csv")
|
33 |
-
print(data["customer_name"].count())
|
34 |
-
|
35 |
-
data = pd.read_csv("model/trainer_data_balanced.csv")
|
36 |
print(data["customer_name"].count())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
|
39 |
# Select columns
|
@@ -70,7 +88,7 @@ def train_the_model():
|
|
70 |
'colsample_bytree': 0.9,
|
71 |
'learning_rate': 0.1,
|
72 |
'max_depth': 30,
|
73 |
-
'n_estimators':
|
74 |
'subsample': 0.9,
|
75 |
'use_label_encoder': False,
|
76 |
'eval_metric': 'logloss'
|
@@ -92,14 +110,15 @@ def train_the_model():
|
|
92 |
classification_rep = classification_report(y_test, y_pred)
|
93 |
|
94 |
# Save the model
|
95 |
-
model_filename = 'model/
|
96 |
dump(xgb, model_filename)
|
97 |
|
98 |
# Save the encoders
|
99 |
-
encoders_filename = 'model/
|
100 |
dump(encoders, encoders_filename)
|
101 |
|
102 |
-
return accuracy,classification_rep,"Model trained with new data"
|
|
|
103 |
|
104 |
@app.get("/trigger_the_data_fecher")
|
105 |
async def your_continuous_function(page: int,paginate: int,Tenant: str):
|
@@ -131,56 +150,29 @@ async def your_continuous_function(page: int,paginate: int,Tenant: str):
|
|
131 |
#data.to_csv("new.csv")
|
132 |
|
133 |
try:
|
134 |
-
file_path = 'model/trainer_data.csv' # Replace with your file path
|
135 |
source_csv = pd.read_csv(file_path)
|
136 |
new_data = df
|
137 |
combined_df_final = pd.concat([source_csv,new_data], ignore_index=True)
|
138 |
|
139 |
-
combined_df_final.to_csv("model/trainer_data.csv")
|
140 |
print("data added")
|
141 |
except:
|
142 |
|
143 |
-
df.to_csv("model/trainer_data.csv")
|
144 |
print("data created")
|
145 |
-
|
146 |
-
# Load the dataset
|
147 |
-
file_path = 'model/trainer_data.csv' # Update to the correct file path
|
148 |
-
data = pd.read_csv(file_path)
|
149 |
-
# Analyze class distribution
|
150 |
-
class_distribution = data['status.name'].value_counts()
|
151 |
-
print("Class Distribution before balancing:\n", class_distribution)
|
152 |
-
|
153 |
-
# Get the size of the largest class to match other classes' sizes
|
154 |
-
max_class_size = class_distribution.max()
|
155 |
-
|
156 |
-
# Oversampling
|
157 |
-
oversampled_data = pd.DataFrame()
|
158 |
-
for class_name, group in data.groupby('status.name'):
|
159 |
-
oversampled_group = resample(group,
|
160 |
-
replace=True, # Sample with replacement
|
161 |
-
n_samples=max_class_size, # to match majority class
|
162 |
-
random_state=123) # for reproducibility
|
163 |
-
oversampled_data = pd.concat([oversampled_data, oversampled_group], axis=0)
|
164 |
-
|
165 |
-
# Verify new class distribution
|
166 |
-
print("Class Distribution after oversampling:\n", oversampled_data['status.name'].value_counts())
|
167 |
-
|
168 |
-
# Save the balanced dataset if needed
|
169 |
-
oversampled_data.to_csv('model/trainer_data_balanced.csv', index=False)
|
170 |
-
|
171 |
-
accuracy,classification_rep,message = train_the_model()
|
172 |
-
|
173 |
-
return {"message":message,"page_number":page,"data_count":data_count,"accuracy":accuracy,"classification_rep":classification_rep}
|
174 |
|
175 |
|
176 |
|
177 |
|
178 |
@app.get("/get_latest_model_updated_time")
|
179 |
-
async def model_updated_time():
|
180 |
try:
|
181 |
-
m_time_encoder = os.path.getmtime('model/
|
182 |
-
m_time_model = os.path.getmtime('model/
|
183 |
-
return {"
|
|
|
184 |
"last model updated time":datetime.datetime.fromtimestamp(m_time_model)}
|
185 |
except:
|
186 |
return {"no model found so first trained the model using data fecther"}
|
@@ -191,7 +183,9 @@ async def model_updated_time():
|
|
191 |
|
192 |
# Endpoint for making predictions
|
193 |
@app.post("/predict")
|
194 |
-
def predict(
|
|
|
|
|
195 |
customer_address: str,
|
196 |
customer_phone: str,
|
197 |
customer_email: str,
|
@@ -204,8 +198,8 @@ def predict(customer_name: str,
|
|
204 |
|
205 |
try:
|
206 |
# Load your trained model and encoders
|
207 |
-
xgb_model = load('model/
|
208 |
-
encoders = load('model/
|
209 |
except:
|
210 |
return {"no model found so first trained the model using data fecther"}
|
211 |
|
@@ -250,4 +244,4 @@ def predict(customer_name: str,
|
|
250 |
if predicted_status == "RETURN TO CLIENT":
|
251 |
probability = 100 - probability
|
252 |
|
253 |
-
return {"Probability": round(probability,2)}
|
|
|
26 |
)
|
27 |
|
28 |
|
29 |
+
@app.get("/train_the_model")
|
30 |
+
async def train_the_model(Tenant: str):
|
31 |
+
# Load the dataset
|
32 |
+
data = pd.read_csv(f"model/{Tenant}trainer_data.csv")
|
|
|
|
|
|
|
33 |
print(data["customer_name"].count())
|
34 |
+
|
35 |
+
# Analyze class distribution
|
36 |
+
class_distribution = data['status.name'].value_counts()
|
37 |
+
print("Class Distribution before balancing:\n", class_distribution)
|
38 |
+
|
39 |
+
# Get the size of the largest class to match other classes' sizes
|
40 |
+
max_class_size = class_distribution.max()
|
41 |
+
|
42 |
+
# Oversampling
|
43 |
+
oversampled_data = pd.DataFrame()
|
44 |
+
for class_name, group in data.groupby('status.name'):
|
45 |
+
oversampled_group = resample(group,
|
46 |
+
replace=True, # Sample with replacement
|
47 |
+
n_samples=max_class_size, # to match majority class
|
48 |
+
random_state=123) # for reproducibility
|
49 |
+
oversampled_data = pd.concat([oversampled_data, oversampled_group], axis=0)
|
50 |
+
|
51 |
+
# Verify new class distribution
|
52 |
+
print("Class Distribution after oversampling:\n", oversampled_data['status.name'].value_counts())
|
53 |
+
|
54 |
+
data = oversampled_data
|
55 |
|
56 |
|
57 |
# Select columns
|
|
|
88 |
'colsample_bytree': 0.9,
|
89 |
'learning_rate': 0.1,
|
90 |
'max_depth': 30,
|
91 |
+
'n_estimators': 600,
|
92 |
'subsample': 0.9,
|
93 |
'use_label_encoder': False,
|
94 |
'eval_metric': 'logloss'
|
|
|
110 |
classification_rep = classification_report(y_test, y_pred)
|
111 |
|
112 |
# Save the model
|
113 |
+
model_filename = f'model/{Tenant}_curfox_xgb_model.joblib'
|
114 |
dump(xgb, model_filename)
|
115 |
|
116 |
# Save the encoders
|
117 |
+
encoders_filename = f'model/{Tenant}_curfox_encoders.joblib'
|
118 |
dump(encoders, encoders_filename)
|
119 |
|
120 |
+
return accuracy,classification_rep,"Model trained with new data for :",model_filename
|
121 |
+
|
122 |
|
123 |
@app.get("/trigger_the_data_fecher")
|
124 |
async def your_continuous_function(page: int,paginate: int,Tenant: str):
|
|
|
150 |
#data.to_csv("new.csv")
|
151 |
|
152 |
try:
|
153 |
+
file_path = f'model/{Tenant}trainer_data.csv' # Replace with your file path
|
154 |
source_csv = pd.read_csv(file_path)
|
155 |
new_data = df
|
156 |
combined_df_final = pd.concat([source_csv,new_data], ignore_index=True)
|
157 |
|
158 |
+
combined_df_final.to_csv(f"model/{Tenant}trainer_data.csv")
|
159 |
print("data added")
|
160 |
except:
|
161 |
|
162 |
+
df.to_csv(f"model/{Tenant}trainer_data.csv")
|
163 |
print("data created")
|
164 |
+
return {"message":"done","page_number":page,"data_count":data_count}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
|
166 |
|
167 |
|
168 |
|
169 |
@app.get("/get_latest_model_updated_time")
|
170 |
+
async def model_updated_time(Tenant: str):
|
171 |
try:
|
172 |
+
m_time_encoder = os.path.getmtime(f'model/{Tenant}_curfox_encoders.joblib')
|
173 |
+
m_time_model = os.path.getmtime(f'model/{Tenant}_curfox_xgb_model.joblib')
|
174 |
+
return {"Tenant":Tenant,
|
175 |
+
"base model created time ":datetime.datetime.fromtimestamp(m_time_encoder),
|
176 |
"last model updated time":datetime.datetime.fromtimestamp(m_time_model)}
|
177 |
except:
|
178 |
return {"no model found so first trained the model using data fecther"}
|
|
|
183 |
|
184 |
# Endpoint for making predictions
|
185 |
@app.post("/predict")
|
186 |
+
def predict(
|
187 |
+
Tenant: str,
|
188 |
+
customer_name: str,
|
189 |
customer_address: str,
|
190 |
customer_phone: str,
|
191 |
customer_email: str,
|
|
|
198 |
|
199 |
try:
|
200 |
# Load your trained model and encoders
|
201 |
+
xgb_model = load(f'model/{Tenant}_curfox_xgb_model.joblib')
|
202 |
+
encoders = load(f'model/{Tenant}_curfox_encoders.joblib')
|
203 |
except:
|
204 |
return {"no model found so first trained the model using data fecther"}
|
205 |
|
|
|
244 |
if predicted_status == "RETURN TO CLIENT":
|
245 |
probability = 100 - probability
|
246 |
|
247 |
+
return {"Probability": round(probability,2),"Tenant":Tenant}
|