import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, classification_report class PredictiveAnalytics: def __init__(self): self.model = None self.scaler = StandardScaler() self.target_column = None def predict(self, data): # Identify the target column (assuming it's the last column) self.target_column = data.columns[-1] # Prepare the data X = data.drop(columns=[self.target_column]) y = data[self.target_column] # Split the data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Scale the features X_train_scaled = self.scaler.fit_transform(X_train) X_test_scaled = self.scaler.transform(X_test) # Determine if it's a regression or classification problem if y.dtype == 'object' or len(np.unique(y)) < 10: self.model = RandomForestClassifier(n_estimators=100, random_state=42) is_classification = True else: self.model = RandomForestRegressor(n_estimators=100, random_state=42) is_classification = False # Train the model self.model.fit(X_train_scaled, y_train) # Make predictions y_pred = self.model.predict(X_test_scaled) # Evaluate the model if is_classification: accuracy = accuracy_score(y_test, y_pred) report = classification_report(y_test, y_pred) return f"Classification Results:\nAccuracy: {accuracy:.2f}\n\nClassification Report:\n{report}" else: mse = mean_squared_error(y_test, y_pred) r2 = r2_score(y_test, y_pred) return f"Regression Results:\nMean Squared Error: {mse:.2f}\nR-squared Score: {r2:.2f}" def get_feature_importance(self): if self.model is None: return "Model has not been trained yet." feature_importance = pd.DataFrame({ 'feature': self.model.feature_names_in_, 'importance': self.model.feature_importances_ }).sort_values('importance', ascending=False) return feature_importance