hackathon / predictive_analytics.py
Ashar086's picture
Create predictive_analytics.py
cfe2f48 verified
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, classification_report
class PredictiveAnalytics:
def __init__(self):
self.model = None
self.scaler = StandardScaler()
self.target_column = None
def predict(self, data):
# Identify the target column (assuming it's the last column)
self.target_column = data.columns[-1]
# Prepare the data
X = data.drop(columns=[self.target_column])
y = data[self.target_column]
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Scale the features
X_train_scaled = self.scaler.fit_transform(X_train)
X_test_scaled = self.scaler.transform(X_test)
# Determine if it's a regression or classification problem
if y.dtype == 'object' or len(np.unique(y)) < 10:
self.model = RandomForestClassifier(n_estimators=100, random_state=42)
is_classification = True
else:
self.model = RandomForestRegressor(n_estimators=100, random_state=42)
is_classification = False
# Train the model
self.model.fit(X_train_scaled, y_train)
# Make predictions
y_pred = self.model.predict(X_test_scaled)
# Evaluate the model
if is_classification:
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
return f"Classification Results:\nAccuracy: {accuracy:.2f}\n\nClassification Report:\n{report}"
else:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
return f"Regression Results:\nMean Squared Error: {mse:.2f}\nR-squared Score: {r2:.2f}"
def get_feature_importance(self):
if self.model is None:
return "Model has not been trained yet."
feature_importance = pd.DataFrame({
'feature': self.model.feature_names_in_,
'importance': self.model.feature_importances_
}).sort_values('importance', ascending=False)
return feature_importance