hackathon / data_analysis.py
Ashar086's picture
Rename text_analyzer.py to data_analysis.py
568dff8 verified
raw
history blame
1.38 kB
import pandas as pd
import numpy as np
from scipy import stats
class DataAnalyzer:
def analyze(self, data):
insights = {}
# Basic statistics
insights['basic_stats'] = data.describe().to_dict()
# Correlation analysis
numeric_columns = data.select_dtypes(include=[np.number]).columns
if len(numeric_columns) > 1:
correlation_matrix = data[numeric_columns].corr()
insights['correlations'] = correlation_matrix.to_dict()
# Skewness and kurtosis
skewness = data[numeric_columns].skew()
kurtosis = data[numeric_columns].kurtosis()
insights['distribution'] = {
'skewness': skewness.to_dict(),
'kurtosis': kurtosis.to_dict()
}
# Categorical data analysis
categorical_columns = data.select_dtypes(include=['object']).columns
for column in categorical_columns:
insights[f'{column}_distribution'] = data[column].value_counts().to_dict()
# Check for normality
normality_tests = {}
for column in numeric_columns:
_, p_value = stats.normaltest(data[column].dropna())
normality_tests[column] = {
'is_normal': p_value > 0.05,
'p_value': p_value
}
insights['normality_tests'] = normality_tests
return insights