import pandas as pd import numpy as np from scipy import stats class DataAnalyzer: def analyze(self, data): insights = {} # Basic statistics insights['basic_stats'] = data.describe().to_dict() # Correlation analysis numeric_columns = data.select_dtypes(include=[np.number]).columns if len(numeric_columns) > 1: correlation_matrix = data[numeric_columns].corr() insights['correlations'] = correlation_matrix.to_dict() # Skewness and kurtosis skewness = data[numeric_columns].skew() kurtosis = data[numeric_columns].kurtosis() insights['distribution'] = { 'skewness': skewness.to_dict(), 'kurtosis': kurtosis.to_dict() } # Categorical data analysis categorical_columns = data.select_dtypes(include=['object']).columns for column in categorical_columns: insights[f'{column}_distribution'] = data[column].value_counts().to_dict() # Check for normality normality_tests = {} for column in numeric_columns: _, p_value = stats.normaltest(data[column].dropna()) normality_tests[column] = { 'is_normal': p_value > 0.05, 'p_value': p_value } insights['normality_tests'] = normality_tests return insights