Arafath10 commited on
Commit
c0eefcc
·
verified ·
1 Parent(s): 8d31b67

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +45 -51
main.py CHANGED
@@ -26,14 +26,32 @@ app.add_middleware(
26
  )
27
 
28
 
29
-
30
- def train_the_model():
31
-
32
- data = pd.read_csv("model/trainer_data.csv")
33
- print(data["customer_name"].count())
34
-
35
- data = pd.read_csv("model/trainer_data_balanced.csv")
36
  print(data["customer_name"].count())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
 
39
  # Select columns
@@ -70,7 +88,7 @@ def train_the_model():
70
  'colsample_bytree': 0.9,
71
  'learning_rate': 0.1,
72
  'max_depth': 30,
73
- 'n_estimators': 500,
74
  'subsample': 0.9,
75
  'use_label_encoder': False,
76
  'eval_metric': 'logloss'
@@ -92,14 +110,15 @@ def train_the_model():
92
  classification_rep = classification_report(y_test, y_pred)
93
 
94
  # Save the model
95
- model_filename = 'model/curfox_xgb_model.joblib'
96
  dump(xgb, model_filename)
97
 
98
  # Save the encoders
99
- encoders_filename = 'model/curfox_encoders.joblib'
100
  dump(encoders, encoders_filename)
101
 
102
- return accuracy,classification_rep,"Model trained with new data"
 
103
 
104
  @app.get("/trigger_the_data_fecher")
105
  async def your_continuous_function(page: int,paginate: int,Tenant: str):
@@ -131,56 +150,29 @@ async def your_continuous_function(page: int,paginate: int,Tenant: str):
131
  #data.to_csv("new.csv")
132
 
133
  try:
134
- file_path = 'model/trainer_data.csv' # Replace with your file path
135
  source_csv = pd.read_csv(file_path)
136
  new_data = df
137
  combined_df_final = pd.concat([source_csv,new_data], ignore_index=True)
138
 
139
- combined_df_final.to_csv("model/trainer_data.csv")
140
  print("data added")
141
  except:
142
 
143
- df.to_csv("model/trainer_data.csv")
144
  print("data created")
145
-
146
- # Load the dataset
147
- file_path = 'model/trainer_data.csv' # Update to the correct file path
148
- data = pd.read_csv(file_path)
149
- # Analyze class distribution
150
- class_distribution = data['status.name'].value_counts()
151
- print("Class Distribution before balancing:\n", class_distribution)
152
-
153
- # Get the size of the largest class to match other classes' sizes
154
- max_class_size = class_distribution.max()
155
-
156
- # Oversampling
157
- oversampled_data = pd.DataFrame()
158
- for class_name, group in data.groupby('status.name'):
159
- oversampled_group = resample(group,
160
- replace=True, # Sample with replacement
161
- n_samples=max_class_size, # to match majority class
162
- random_state=123) # for reproducibility
163
- oversampled_data = pd.concat([oversampled_data, oversampled_group], axis=0)
164
-
165
- # Verify new class distribution
166
- print("Class Distribution after oversampling:\n", oversampled_data['status.name'].value_counts())
167
-
168
- # Save the balanced dataset if needed
169
- oversampled_data.to_csv('model/trainer_data_balanced.csv', index=False)
170
-
171
- accuracy,classification_rep,message = train_the_model()
172
-
173
- return {"message":message,"page_number":page,"data_count":data_count,"accuracy":accuracy,"classification_rep":classification_rep}
174
 
175
 
176
 
177
 
178
  @app.get("/get_latest_model_updated_time")
179
- async def model_updated_time():
180
  try:
181
- m_time_encoder = os.path.getmtime('model/curfox_encoders.joblib')
182
- m_time_model = os.path.getmtime('model/curfox_xgb_model.joblib')
183
- return {"base model created time ":datetime.datetime.fromtimestamp(m_time_encoder),
 
184
  "last model updated time":datetime.datetime.fromtimestamp(m_time_model)}
185
  except:
186
  return {"no model found so first trained the model using data fecther"}
@@ -191,7 +183,9 @@ async def model_updated_time():
191
 
192
  # Endpoint for making predictions
193
  @app.post("/predict")
194
- def predict(customer_name: str,
 
 
195
  customer_address: str,
196
  customer_phone: str,
197
  customer_email: str,
@@ -204,8 +198,8 @@ def predict(customer_name: str,
204
 
205
  try:
206
  # Load your trained model and encoders
207
- xgb_model = load('model/curfox_xgb_model.joblib')
208
- encoders = load('model/curfox_encoders.joblib')
209
  except:
210
  return {"no model found so first trained the model using data fecther"}
211
 
@@ -250,4 +244,4 @@ def predict(customer_name: str,
250
  if predicted_status == "RETURN TO CLIENT":
251
  probability = 100 - probability
252
 
253
- return {"Probability": round(probability,2)}
 
26
  )
27
 
28
 
29
+ @app.get("/train_the_model")
30
+ async def train_the_model(Tenant: str):
31
+ # Load the dataset
32
+ data = pd.read_csv(f"model/{Tenant}trainer_data.csv")
 
 
 
33
  print(data["customer_name"].count())
34
+
35
+ # Analyze class distribution
36
+ class_distribution = data['status.name'].value_counts()
37
+ print("Class Distribution before balancing:\n", class_distribution)
38
+
39
+ # Get the size of the largest class to match other classes' sizes
40
+ max_class_size = class_distribution.max()
41
+
42
+ # Oversampling
43
+ oversampled_data = pd.DataFrame()
44
+ for class_name, group in data.groupby('status.name'):
45
+ oversampled_group = resample(group,
46
+ replace=True, # Sample with replacement
47
+ n_samples=max_class_size, # to match majority class
48
+ random_state=123) # for reproducibility
49
+ oversampled_data = pd.concat([oversampled_data, oversampled_group], axis=0)
50
+
51
+ # Verify new class distribution
52
+ print("Class Distribution after oversampling:\n", oversampled_data['status.name'].value_counts())
53
+
54
+ data = oversampled_data
55
 
56
 
57
  # Select columns
 
88
  'colsample_bytree': 0.9,
89
  'learning_rate': 0.1,
90
  'max_depth': 30,
91
+ 'n_estimators': 600,
92
  'subsample': 0.9,
93
  'use_label_encoder': False,
94
  'eval_metric': 'logloss'
 
110
  classification_rep = classification_report(y_test, y_pred)
111
 
112
  # Save the model
113
+ model_filename = f'model/{Tenant}_curfox_xgb_model.joblib'
114
  dump(xgb, model_filename)
115
 
116
  # Save the encoders
117
+ encoders_filename = f'model/{Tenant}_curfox_encoders.joblib'
118
  dump(encoders, encoders_filename)
119
 
120
+ return accuracy,classification_rep,"Model trained with new data for :",model_filename
121
+
122
 
123
  @app.get("/trigger_the_data_fecher")
124
  async def your_continuous_function(page: int,paginate: int,Tenant: str):
 
150
  #data.to_csv("new.csv")
151
 
152
  try:
153
+ file_path = f'model/{Tenant}trainer_data.csv' # Replace with your file path
154
  source_csv = pd.read_csv(file_path)
155
  new_data = df
156
  combined_df_final = pd.concat([source_csv,new_data], ignore_index=True)
157
 
158
+ combined_df_final.to_csv(f"model/{Tenant}trainer_data.csv")
159
  print("data added")
160
  except:
161
 
162
+ df.to_csv(f"model/{Tenant}trainer_data.csv")
163
  print("data created")
164
+ return {"message":"done","page_number":page,"data_count":data_count}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
 
166
 
167
 
168
 
169
  @app.get("/get_latest_model_updated_time")
170
+ async def model_updated_time(Tenant: str):
171
  try:
172
+ m_time_encoder = os.path.getmtime(f'model/{Tenant}_curfox_encoders.joblib')
173
+ m_time_model = os.path.getmtime(f'model/{Tenant}_curfox_xgb_model.joblib')
174
+ return {"Tenant":Tenant,
175
+ "base model created time ":datetime.datetime.fromtimestamp(m_time_encoder),
176
  "last model updated time":datetime.datetime.fromtimestamp(m_time_model)}
177
  except:
178
  return {"no model found so first trained the model using data fecther"}
 
183
 
184
  # Endpoint for making predictions
185
  @app.post("/predict")
186
+ def predict(
187
+ Tenant: str,
188
+ customer_name: str,
189
  customer_address: str,
190
  customer_phone: str,
191
  customer_email: str,
 
198
 
199
  try:
200
  # Load your trained model and encoders
201
+ xgb_model = load(f'model/{Tenant}_curfox_xgb_model.joblib')
202
+ encoders = load(f'model/{Tenant}_curfox_encoders.joblib')
203
  except:
204
  return {"no model found so first trained the model using data fecther"}
205
 
 
244
  if predicted_status == "RETURN TO CLIENT":
245
  probability = 100 - probability
246
 
247
+ return {"Probability": round(probability,2),"Tenant":Tenant}