curfox_model_trainer

Sleeping

App Files Files Community

Arafath10 commited on May 25, 2024

Commit

c0eefcc

verified ·

1 Parent(s): 8d31b67

Update main.py

Browse files

Files changed (1) hide show

main.py +45 -51

main.py CHANGED Viewed

@@ -26,14 +26,32 @@ app.add_middleware(
 )
-def train_the_model():
-        data = pd.read_csv("model/trainer_data.csv")
-        print(data["customer_name"].count())
-        data = pd.read_csv("model/trainer_data_balanced.csv")
         print(data["customer_name"].count())
         # Select columns
@@ -70,7 +88,7 @@ def train_the_model():
             'colsample_bytree': 0.9,
             'learning_rate': 0.1,
             'max_depth': 30,
-            'n_estimators': 500,
             'subsample': 0.9,
             'use_label_encoder': False,
             'eval_metric': 'logloss'
@@ -92,14 +110,15 @@ def train_the_model():
         classification_rep = classification_report(y_test, y_pred)
         # Save the model
-        model_filename = 'model/curfox_xgb_model.joblib'
         dump(xgb, model_filename)
         # Save the encoders
-        encoders_filename = 'model/curfox_encoders.joblib'
         dump(encoders, encoders_filename)
-        return accuracy,classification_rep,"Model trained with new data"
 @app.get("/trigger_the_data_fecher")
 async def your_continuous_function(page: int,paginate: int,Tenant: str):
@@ -131,56 +150,29 @@ async def your_continuous_function(page: int,paginate: int,Tenant: str):
     #data.to_csv("new.csv")
     try:
-        file_path = 'model/trainer_data.csv'  # Replace with your file path
         source_csv = pd.read_csv(file_path)
         new_data = df
         combined_df_final = pd.concat([source_csv,new_data], ignore_index=True)
-        combined_df_final.to_csv("model/trainer_data.csv")
         print("data added")
     except:
-        df.to_csv("model/trainer_data.csv")
         print("data created")
-    # Load the dataset
-    file_path = 'model/trainer_data.csv'  # Update to the correct file path
-    data = pd.read_csv(file_path)
-    # Analyze class distribution
-    class_distribution = data['status.name'].value_counts()
-    print("Class Distribution before balancing:\n", class_distribution)
-    # Get the size of the largest class to match other classes' sizes
-    max_class_size = class_distribution.max()
-    # Oversampling
-    oversampled_data = pd.DataFrame()
-    for class_name, group in data.groupby('status.name'):
-        oversampled_group = resample(group,
-                                     replace=True,  # Sample with replacement
-                                     n_samples=max_class_size,  # to match majority class
-                                     random_state=123)  # for reproducibility
-        oversampled_data = pd.concat([oversampled_data, oversampled_group], axis=0)
-    # Verify new class distribution
-    print("Class Distribution after oversampling:\n", oversampled_data['status.name'].value_counts())
-    # Save the balanced dataset if needed
-    oversampled_data.to_csv('model/trainer_data_balanced.csv', index=False)
-    accuracy,classification_rep,message = train_the_model()
-    return {"message":message,"page_number":page,"data_count":data_count,"accuracy":accuracy,"classification_rep":classification_rep}
 @app.get("/get_latest_model_updated_time")
-async def model_updated_time():
     try:
-        m_time_encoder = os.path.getmtime('model/curfox_encoders.joblib')
-        m_time_model = os.path.getmtime('model/curfox_xgb_model.joblib')
-        return {"base model created time ":datetime.datetime.fromtimestamp(m_time_encoder),
                 "last model updated time":datetime.datetime.fromtimestamp(m_time_model)}
     except:
         return {"no model found so first trained the model using data fecther"}
@@ -191,7 +183,9 @@ async def model_updated_time():
 # Endpoint for making predictions
 @app.post("/predict")
-def predict(customer_name: str,
     customer_address: str,
     customer_phone: str,
     customer_email: str,
@@ -204,8 +198,8 @@ def predict(customer_name: str,
     try:
         # Load your trained model and encoders
-        xgb_model = load('model/curfox_xgb_model.joblib')
-        encoders = load('model/curfox_encoders.joblib')
     except:
         return {"no model found so first trained the model using data fecther"}
@@ -250,4 +244,4 @@ def predict(customer_name: str,
     if predicted_status == "RETURN TO CLIENT":
        probability = 100 - probability
-    return {"Probability": round(probability,2)}

 )
+@app.get("/train_the_model")
+async def train_the_model(Tenant: str):
+        # Load the dataset
+        data = pd.read_csv(f"model/{Tenant}trainer_data.csv")
         print(data["customer_name"].count())
+        # Analyze class distribution
+        class_distribution = data['status.name'].value_counts()
+        print("Class Distribution before balancing:\n", class_distribution)
+        # Get the size of the largest class to match other classes' sizes
+        max_class_size = class_distribution.max()
+        # Oversampling
+        oversampled_data = pd.DataFrame()
+        for class_name, group in data.groupby('status.name'):
+            oversampled_group = resample(group,
+                                         replace=True,  # Sample with replacement
+                                         n_samples=max_class_size,  # to match majority class
+                                         random_state=123)  # for reproducibility
+            oversampled_data = pd.concat([oversampled_data, oversampled_group], axis=0)
+        # Verify new class distribution
+        print("Class Distribution after oversampling:\n", oversampled_data['status.name'].value_counts())
+        data = oversampled_data
         # Select columns
             'colsample_bytree': 0.9,
             'learning_rate': 0.1,
             'max_depth': 30,
+            'n_estimators': 600,
             'subsample': 0.9,
             'use_label_encoder': False,
             'eval_metric': 'logloss'
         classification_rep = classification_report(y_test, y_pred)
         # Save the model
+        model_filename = f'model/{Tenant}_curfox_xgb_model.joblib'
         dump(xgb, model_filename)
         # Save the encoders
+        encoders_filename = f'model/{Tenant}_curfox_encoders.joblib'
         dump(encoders, encoders_filename)
+        return accuracy,classification_rep,"Model trained with new data for :",model_filename
 @app.get("/trigger_the_data_fecher")
 async def your_continuous_function(page: int,paginate: int,Tenant: str):
     #data.to_csv("new.csv")
     try:
+        file_path = f'model/{Tenant}trainer_data.csv'  # Replace with your file path
         source_csv = pd.read_csv(file_path)
         new_data = df
         combined_df_final = pd.concat([source_csv,new_data], ignore_index=True)
+        combined_df_final.to_csv(f"model/{Tenant}trainer_data.csv")
         print("data added")
     except:
+        df.to_csv(f"model/{Tenant}trainer_data.csv")
         print("data created")
+    return {"message":"done","page_number":page,"data_count":data_count}
 @app.get("/get_latest_model_updated_time")
+async def model_updated_time(Tenant: str):
     try:
+        m_time_encoder = os.path.getmtime(f'model/{Tenant}_curfox_encoders.joblib')
+        m_time_model = os.path.getmtime(f'model/{Tenant}_curfox_xgb_model.joblib')
+        return {"Tenant":Tenant,
+                "base model created time ":datetime.datetime.fromtimestamp(m_time_encoder),
                 "last model updated time":datetime.datetime.fromtimestamp(m_time_model)}
     except:
         return {"no model found so first trained the model using data fecther"}
 # Endpoint for making predictions
 @app.post("/predict")
+def predict(
+    Tenant: str,
+    customer_name: str,
     customer_address: str,
     customer_phone: str,
     customer_email: str,
     try:
         # Load your trained model and encoders
+        xgb_model = load(f'model/{Tenant}_curfox_xgb_model.joblib')
+        encoders = load(f'model/{Tenant}_curfox_encoders.joblib')
     except:
         return {"no model found so first trained the model using data fecther"}
     if predicted_status == "RETURN TO CLIENT":
        probability = 100 - probability
+    return {"Probability": round(probability,2),"Tenant":Tenant}