Spaces:

Ashar086
/

hackathon

Sleeping

App Files Files Community

hackathon / predictive_analytics.py

Ashar086

Create predictive_analytics.py

cfe2f48 verified 6 months ago

raw

history blame contribute delete

2.41 kB

	import pandas as pd
	import numpy as np
	from sklearn.model_selection import train_test_split
	from sklearn.preprocessing import StandardScaler
	from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
	from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, classification_report

	class PredictiveAnalytics:
	def __init__(self):
	self.model = None
	self.scaler = StandardScaler()
	self.target_column = None

	def predict(self, data):
	# Identify the target column (assuming it's the last column)
	self.target_column = data.columns[-1]

	# Prepare the data
	X = data.drop(columns=[self.target_column])
	y = data[self.target_column]

	# Split the data
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

	# Scale the features
	X_train_scaled = self.scaler.fit_transform(X_train)
	X_test_scaled = self.scaler.transform(X_test)

	# Determine if it's a regression or classification problem
	if y.dtype == 'object' or len(np.unique(y)) < 10:
	self.model = RandomForestClassifier(n_estimators=100, random_state=42)
	is_classification = True
	else:
	self.model = RandomForestRegressor(n_estimators=100, random_state=42)
	is_classification = False

	# Train the model
	self.model.fit(X_train_scaled, y_train)

	# Make predictions
	y_pred = self.model.predict(X_test_scaled)

	# Evaluate the model
	if is_classification:
	accuracy = accuracy_score(y_test, y_pred)
	report = classification_report(y_test, y_pred)
	return f"Classification Results:\nAccuracy: {accuracy:.2f}\n\nClassification Report:\n{report}"
	else:
	mse = mean_squared_error(y_test, y_pred)
	r2 = r2_score(y_test, y_pred)
	return f"Regression Results:\nMean Squared Error: {mse:.2f}\nR-squared Score: {r2:.2f}"

	def get_feature_importance(self):
	if self.model is None:
	return "Model has not been trained yet."

	feature_importance = pd.DataFrame({
	'feature': self.model.feature_names_in_,
	'importance': self.model.feature_importances_
	}).sort_values('importance', ascending=False)

	return feature_importance