import gradio as gr
import pandas as pd
import sweetviz as sv
import tempfile
import os
import category_encoders as ce
import umap
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from autoviz.AutoViz_Class import AutoViz_Class
import shutil
import warnings
import io
import base64
warnings.filterwarnings('ignore')
class DataAnalyzer:
def __init__(self):
self.temp_dir = tempfile.mkdtemp()
self.df = None
self.AV = AutoViz_Class()
def generate_sweetviz_report(self, df):
if df is None:
return "Please upload a dataset first"
self.df = df
report = sv.analyze(df)
report_path = os.path.join(self.temp_dir, "report.html")
report.show_html(report_path, open_browser=False)
with open(report_path, 'r', encoding='utf-8') as f:
html_content = f.read()
html_with_table = f"""
"""
os.remove(report_path)
return html_with_table
def preprocess_dataframe(self, df):
df = df.copy()
# Convert 'value' column to numeric if possible
if 'value' in df.columns:
df['value'] = pd.to_numeric(df['value'].replace('[\$,]', '', regex=True), errors='coerce')
# Handle datetime columns
for col in df.columns:
if df[col].dtype == 'object':
try:
df[col] = pd.to_datetime(df[col], errors='ignore')
except:
pass
# Convert categorical columns with low cardinality
for col in df.select_dtypes(include=['object']).columns:
if df[col].nunique() < 50:
df[col] = df[col].astype('category')
return df
def generate_autoviz_report(self, df):
if df is None:
return "Please upload a dataset first"
viz_temp_dir = os.path.join(self.temp_dir, "autoviz_output")
if os.path.exists(viz_temp_dir):
shutil.rmtree(viz_temp_dir)
os.makedirs(viz_temp_dir)
try:
# Preprocess the dataframe
df = self.preprocess_dataframe(df)
# Sample if needed
if len(df) > 5000:
df = df.sample(n=5000, random_state=42)
print("\nDataset Info:")
print(df.info())
print("\nColumn Types:")
print(df.dtypes)
plt.close('all')
# Create a directory for plots
plots_dir = os.path.join(viz_temp_dir, "plots")
os.makedirs(plots_dir, exist_ok=True)
# Run AutoViz
dfte = self.AV.AutoViz(
filename='',
sep=',',
depVar='value', # Set value as target variable
dfte=df,
header=0,
verbose=1,
lowess=False,
chart_format='html',
max_rows_analyzed=5000,
max_cols_analyzed=30,
save_plot_dir=plots_dir
)
# Generate summary statistics
numeric_cols = df.select_dtypes(include=['number']).columns
categorical_cols = df.select_dtypes(include=['category', 'object']).columns
numeric_stats = df[numeric_cols].describe().round(2) if len(numeric_cols) > 0 else pd.DataFrame()
categorical_stats = df[categorical_cols].describe() if len(categorical_cols) > 0 else pd.DataFrame()
# Create HTML content with styling
html_content = """
"""
html_content += f"""
Data Analysis Report
Dataset Overview
Total Rows: {len(df)}
Total Columns: {len(df.columns)}
Numeric Variables Summary
{numeric_stats.to_html(classes='table table-striped')}
Categorical Variables Summary
{categorical_stats.to_html(classes='table table-striped')}
Column Types
{df.dtypes.to_string()}
"""
# Add plots if they exist
if os.path.exists(plots_dir):
for file in sorted(os.listdir(plots_dir)):
if file.endswith('.html'):
with open(os.path.join(plots_dir, file), 'r', encoding='utf-8') as f:
plot_content = f.read()
if plot_content.strip():
html_content += f"""
{file.replace('.html', '').replace('_', ' ').title()}
{plot_content}
"""
html_content += "
"
return html_content
except Exception as e:
import traceback
error_message = f"""
Error in AutoViz Analysis
Error details: {str(e)}
Stack trace:
{traceback.format_exc()}
Dataset Info:
Rows: {len(df)}
Columns: {len(df.columns)}
Types:\n{df.dtypes.to_string()}
"""
return error_message
finally:
if os.path.exists(viz_temp_dir):
shutil.rmtree(viz_temp_dir)
def create_interface():
analyzer = DataAnalyzer()
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# Data Analysis Dashboard
This dashboard provides comprehensive data analysis and visualization capabilities.
""")
# Store the dataframe in a state variable
current_df = gr.State(None)
with gr.Tabs():
# First Tab: Data Upload & Preview
with gr.TabItem("Data Upload & Preview"):
with gr.Row():
with gr.Column(scale=2):
file_input = gr.File(
label="Upload CSV File",
file_types=[".csv"],
file_count="single"
)
with gr.Column(scale=1):
gr.Markdown("""
### Upload Instructions
1. Select a CSV file
2. File will be automatically loaded
3. Preview will appear below
""")
with gr.Row():
data_info = gr.Markdown("No data uploaded yet")
with gr.Row():
data_preview = gr.Dataframe(
label="Data Preview",
interactive=False,
wrap=True
)
def load_data(file):
if file is None:
return "No data uploaded yet", None, None
try:
df = pd.read_csv(file.name)
info_text = f"""
### Dataset Information
- Rows: {len(df)}
- Columns: {len(df.columns)}
- Memory Usage: {df.memory_usage(deep=True).sum() / 1024:.2f} KB
- Column Types: {dict(df.dtypes.value_counts())}
"""
return info_text, df.head(10), df
except Exception as e:
return f"Error loading file: {str(e)}", None, None
file_input.change(
fn=load_data,
inputs=[file_input],
outputs=[data_info, data_preview, current_df]
)
# Second Tab: Sweetviz Analysis
with gr.TabItem("Sweetviz Analysis"):
with gr.Row():
with gr.Column(scale=2):
sweetviz_button = gr.Button(
"Generate Sweetviz Report",
variant="primary"
)
with gr.Column(scale=1):
gr.Markdown("""
### Sweetviz Analysis Features
- Comprehensive data profiling
- Statistical analysis
- Feature correlations
- Missing value analysis
""")
with gr.Row():
sweetviz_output = gr.HTML(
label="Sweetviz Report",
value="Click the button above to generate the report"
)
def generate_sweetviz(df):
if df is None:
return "Please upload a dataset first"
try:
return analyzer.generate_sweetviz_report(df)
except Exception as e:
return f"Error generating Sweetviz report: {str(e)}"
sweetviz_button.click(
fn=generate_sweetviz,
inputs=[current_df],
outputs=[sweetviz_output]
)
# Third Tab: AutoViz Analysis
with gr.TabItem("AutoViz Analysis"):
with gr.Row():
with gr.Column(scale=2):
autoviz_button = gr.Button(
"Generate AutoViz Report",
variant="primary"
)
with gr.Column(scale=1):
gr.Markdown("""
### AutoViz Analysis Features
- Automated visualization generation
- Distribution analysis
- Correlation plots
- Feature relationships
- Time series analysis (if applicable)
""")
with gr.Row():
autoviz_output = gr.HTML(
label="AutoViz Report",
value="Click the button above to generate the report"
)
def generate_autoviz(df):
if df is None:
return "Please upload a dataset first"
try:
return analyzer.generate_autoviz_report(df)
except Exception as e:
return f"Error generating AutoViz report: {str(e)}"
autoviz_button.click(
fn=generate_autoviz,
inputs=[current_df],
outputs=[autoviz_output]
)
return demo
if __name__ == "__main__":
demo = create_interface()
demo.launch(
server_name="0.0.0.0",
server_port=7860,
show_error=True,
share=False # Set to True if you want to create a public link
)