|
import pandas as pd |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
from io import StringIO |
|
|
|
class ReportGenerator: |
|
def generate(self, data): |
|
report = StringIO() |
|
report.write("Data Analysis Report\n\n") |
|
|
|
|
|
report.write("1. Data Summary\n") |
|
report.write(data.describe().to_string()) |
|
report.write("\n\n") |
|
|
|
|
|
report.write("2. Missing Values\n") |
|
missing_values = data.isnull().sum() |
|
report.write(missing_values.to_string()) |
|
report.write("\n\n") |
|
|
|
|
|
report.write("3. Correlation Analysis\n") |
|
numeric_columns = data.select_dtypes(include=[pd.np.number]).columns |
|
if len(numeric_columns) > 1: |
|
correlation_matrix = data[numeric_columns].corr() |
|
report.write(correlation_matrix.to_string()) |
|
else: |
|
report.write("Not enough numeric columns for correlation analysis.") |
|
report.write("\n\n") |
|
|
|
|
|
categorical_columns = data.select_dtypes(include=['object']).columns |
|
if len(categorical_columns) > 0: |
|
report.write("4. Categorical Data Analysis\n") |
|
for column in categorical_columns: |
|
report.write(f"{column} value counts:\n") |
|
report.write(data[column].value_counts().to_string()) |
|
report.write("\n\n") |
|
|
|
|
|
report.write("5. Data Visualizations\n") |
|
report.write("Please refer to the generated plots in the Streamlit app for visual representations of the data.\n\n") |
|
|
|
return report.getvalue() |
|
|