|
import pandas as pd |
|
import numpy as np |
|
from sklearn.model_selection import train_test_split |
|
from sklearn.preprocessing import StandardScaler |
|
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier |
|
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, classification_report |
|
|
|
class PredictiveAnalytics: |
|
def __init__(self): |
|
self.model = None |
|
self.scaler = StandardScaler() |
|
self.target_column = None |
|
|
|
def predict(self, data): |
|
|
|
self.target_column = data.columns[-1] |
|
|
|
|
|
X = data.drop(columns=[self.target_column]) |
|
y = data[self.target_column] |
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) |
|
|
|
|
|
X_train_scaled = self.scaler.fit_transform(X_train) |
|
X_test_scaled = self.scaler.transform(X_test) |
|
|
|
|
|
if y.dtype == 'object' or len(np.unique(y)) < 10: |
|
self.model = RandomForestClassifier(n_estimators=100, random_state=42) |
|
is_classification = True |
|
else: |
|
self.model = RandomForestRegressor(n_estimators=100, random_state=42) |
|
is_classification = False |
|
|
|
|
|
self.model.fit(X_train_scaled, y_train) |
|
|
|
|
|
y_pred = self.model.predict(X_test_scaled) |
|
|
|
|
|
if is_classification: |
|
accuracy = accuracy_score(y_test, y_pred) |
|
report = classification_report(y_test, y_pred) |
|
return f"Classification Results:\nAccuracy: {accuracy:.2f}\n\nClassification Report:\n{report}" |
|
else: |
|
mse = mean_squared_error(y_test, y_pred) |
|
r2 = r2_score(y_test, y_pred) |
|
return f"Regression Results:\nMean Squared Error: {mse:.2f}\nR-squared Score: {r2:.2f}" |
|
|
|
def get_feature_importance(self): |
|
if self.model is None: |
|
return "Model has not been trained yet." |
|
|
|
feature_importance = pd.DataFrame({ |
|
'feature': self.model.feature_names_in_, |
|
'importance': self.model.feature_importances_ |
|
}).sort_values('importance', ascending=False) |
|
|
|
return feature_importance |