Arafath10 commited on
Commit
1fbf289
·
verified ·
1 Parent(s): 7d96763

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +105 -76
main.py CHANGED
@@ -35,90 +35,117 @@ app.add_middleware(
35
  from joblib import dump
36
 
37
  def train_the_model(data):
38
- data = data
39
-
40
- # Select columns
41
- selected_columns = ['customer_name', 'customer_address', 'customer_phone',
42
- 'customer_email', 'cod', 'weight',
43
- 'origin_city.name', 'destination_city.name', 'status.name']
44
-
45
- # Handling missing values
46
- data_filled = data[selected_columns].fillna('Missing')
47
-
48
- # Encoding categorical variables
49
- encoders = {col: LabelEncoder() for col in selected_columns if data_filled[col].dtype == 'object'}
50
- for col, encoder in encoders.items():
51
- data_filled[col] = encoder.fit_transform(data_filled[col])
52
-
53
- # Splitting the dataset
54
- X = data_filled.drop('status.name', axis=1)
55
- y = data_filled['status.name']
56
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
57
-
58
- # Setup the hyperparameter grid to search
59
- param_grid = {
60
- 'max_depth': [3, 4, 5],
61
- 'learning_rate': [0.01, 0.1, 0.4],
62
- 'n_estimators': [100, 200, 300],
63
- 'subsample': [0.8, 0.9, 1],
64
- 'colsample_bytree': [0.3, 0.7]
65
- }
66
-
67
- # Initialize the classifier
68
- xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
69
-
70
- # Setup GridSearchCV
71
- grid_search = GridSearchCV(xgb, param_grid, cv=10, n_jobs=-1, scoring='accuracy')
72
-
73
- # Fit the grid search to the data
74
- grid_search.fit(X_train, y_train)
75
-
76
- # Get the best parameters
77
- best_params = grid_search.best_params_
78
- print("Best parameters:", best_params)
79
-
80
- # Train the model with best parameters
81
- best_xgb = XGBClassifier(**best_params, use_label_encoder=False, eval_metric='logloss')
82
- best_xgb.fit(X_train, y_train)
83
-
84
- # Predict on the test set
85
- y_pred = best_xgb.predict(X_test)
86
- y_pred_proba = best_xgb.predict_proba(X_test)
87
-
88
- # Evaluate the model
89
- accuracy = accuracy_score(y_test, y_pred)
90
- classification_rep = classification_report(y_test, y_pred)
91
-
92
- # Print the results
93
- print("Accuracy:", accuracy)
94
- print("Classification Report:\n", classification_report(y_test, y_pred))
95
 
96
-
97
- # Save the model
98
- model_filename = 'xgb_model.joblib'
99
- dump(best_xgb, model_filename)
100
-
101
- # Save the encoders
102
- encoders_filename = 'encoders.joblib'
103
- dump(encoders, encoders_filename)
104
-
105
- print(f"Model saved as {model_filename}")
106
- print(f"Encoders saved as {encoders_filename}")
107
-
108
- @app.get("/trigger_the_data_fecher_every_30min")
109
- async def your_continuous_function(page: int):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  print("data fetcher running.....")
111
 
112
  # Initialize an empty DataFrame to store the combined data
113
  combined_df = pd.DataFrame()
114
 
115
  # Update the payload for each page
116
- url = "https://dev3.api.curfox.parallaxtec.com/api/ml/order-list?sort=id&paginate=500&page="+str(page)
117
 
118
  payload = {}
119
  headers = {
120
  'Accept': 'application/json',
121
- 'X-Tenant': 'royalexpress'
122
  }
123
 
124
  response = requests.request("GET", url, headers=headers, data=payload)
@@ -127,8 +154,10 @@ async def your_continuous_function(page: int):
127
  json_response = response.json()
128
  # Extracting 'data' for conversion
129
  data = json_response['data']
130
-
 
131
  df = pd.json_normalize(data)
 
132
 
133
  # Concatenate the current page's DataFrame with the combined DataFrame
134
  combined_df = pd.concat([combined_df, df], ignore_index=True)
@@ -139,8 +168,8 @@ async def your_continuous_function(page: int):
139
 
140
  train_the_model(data)
141
 
142
- return "model trained with new page : "+str(page)+" data"
143
 
144
  @app.get("/test_api")
145
  async def test_api():
146
- return "kpi_result"
 
35
  from joblib import dump
36
 
37
  def train_the_model(data):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
+ try:
40
+ new_data = data
41
+ encoders = load('encoders.joblib')
42
+ xgb_model = load('xgb_model.joblib')
43
+ selected_columns = ['customer_name', 'customer_address', 'customer_phone',
44
+ 'customer_email', 'cod', 'weight', 'origin_city.name',
45
+ 'destination_city.name', 'status.name']
46
+ new_data_filled = new_data[selected_columns].fillna('Missing')
47
+ for col, encoder in encoders.items():
48
+ if col in new_data_filled.columns:
49
+ unseen_categories = set(new_data_filled[col]) - set(encoder.classes_)
50
+ if unseen_categories:
51
+ for category in unseen_categories:
52
+ encoder.classes_ = np.append(encoder.classes_, category)
53
+ new_data_filled[col] = encoder.transform(new_data_filled[col])
54
+ else:
55
+ new_data_filled[col] = encoder.transform(new_data_filled[col])
56
+ X_new = new_data_filled.drop('status.name', axis=1)
57
+ y_new = new_data_filled['status.name']
58
+ xgb_model.fit(X_new, y_new)
59
+ dump(xgb_model, 'xgb_model.joblib')
60
+ print("Model updated with new data.")
61
+ updated_model_accuracy = evaluate_model(xgb_model, X_test, y_test)
62
+ print("Updated model accuracy:", updated_model_accuracy)
63
+ except:
64
+ data = data
65
+
66
+ # Select columns
67
+ selected_columns = ['customer_name', 'customer_address', 'customer_phone',
68
+ 'customer_email', 'cod', 'weight',
69
+ 'origin_city.name', 'destination_city.name', 'status.name']
70
+
71
+ # Handling missing values
72
+ data_filled = data[selected_columns].fillna('Missing')
73
+
74
+ # Encoding categorical variables
75
+ encoders = {col: LabelEncoder() for col in selected_columns if data_filled[col].dtype == 'object'}
76
+ for col, encoder in encoders.items():
77
+ data_filled[col] = encoder.fit_transform(data_filled[col])
78
+
79
+ # Splitting the dataset
80
+ X = data_filled.drop('status.name', axis=1)
81
+ y = data_filled['status.name']
82
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
83
+
84
+ # Setup the hyperparameter grid to search
85
+ param_grid = {
86
+ 'max_depth': [3, 4, 5],
87
+ 'learning_rate': [0.01, 0.1, 0.4],
88
+ 'n_estimators': [100, 200, 300],
89
+ 'subsample': [0.8, 0.9, 1],
90
+ 'colsample_bytree': [0.3, 0.7]
91
+ }
92
+
93
+ # Initialize the classifier
94
+ xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
95
+
96
+ # Setup GridSearchCV
97
+ grid_search = GridSearchCV(xgb, param_grid, cv=10, n_jobs=-1, scoring='accuracy')
98
+
99
+ # Fit the grid search to the data
100
+ grid_search.fit(X_train, y_train)
101
+
102
+ # Get the best parameters
103
+ best_params = grid_search.best_params_
104
+ print("Best parameters:", best_params)
105
+
106
+ # Train the model with best parameters
107
+ best_xgb = XGBClassifier(**best_params, use_label_encoder=False, eval_metric='logloss')
108
+ best_xgb.fit(X_train, y_train)
109
+
110
+ # Predict on the test set
111
+ y_pred = best_xgb.predict(X_test)
112
+ y_pred_proba = best_xgb.predict_proba(X_test)
113
+
114
+ # Evaluate the model
115
+ accuracy = accuracy_score(y_test, y_pred)
116
+ classification_rep = classification_report(y_test, y_pred)
117
+
118
+ # Print the results
119
+ print("Accuracy:", accuracy)
120
+ print("Classification Report:\n", classification_report(y_test, y_pred))
121
+
122
+
123
+ # Save the model
124
+ model_filename = 'xgb_model.joblib'
125
+ dump(best_xgb, model_filename)
126
+
127
+ # Save the encoders
128
+ encoders_filename = 'encoders.joblib'
129
+ dump(encoders, encoders_filename)
130
+
131
+ print(f"Model saved as {model_filename}")
132
+ print(f"Encoders saved as {encoders_filename}")
133
+ print("new base model trained")
134
+
135
+ @app.get("/trigger_the_data_fecher")
136
+ async def your_continuous_function(page: int,paginate: int,Tenant: str):
137
  print("data fetcher running.....")
138
 
139
  # Initialize an empty DataFrame to store the combined data
140
  combined_df = pd.DataFrame()
141
 
142
  # Update the payload for each page
143
+ url = "https://dev3.api.curfox.parallaxtec.com/api/ml/order-list?sort=id&paginate="+str(paginate)+"&page="+str(page)
144
 
145
  payload = {}
146
  headers = {
147
  'Accept': 'application/json',
148
+ 'X-Tenant': Tenant #'royalexpress'
149
  }
150
 
151
  response = requests.request("GET", url, headers=headers, data=payload)
 
154
  json_response = response.json()
155
  # Extracting 'data' for conversion
156
  data = json_response['data']
157
+ data_count = len(data)
158
+
159
  df = pd.json_normalize(data)
160
+
161
 
162
  # Concatenate the current page's DataFrame with the combined DataFrame
163
  combined_df = pd.concat([combined_df, df], ignore_index=True)
 
168
 
169
  train_the_model(data)
170
 
171
+ return "model trained with page number: "+str(page)+" data count :"+str(data_count)
172
 
173
  @app.get("/test_api")
174
  async def test_api():
175
+ return "api_working"