import gradio as gr import pandas as pd import sweetviz as sv import tempfile import os import category_encoders as ce import umap import matplotlib.pyplot as plt from sklearn.preprocessing import StandardScaler from autoviz.AutoViz_Class import AutoViz_Class import shutil import warnings import io import base64 warnings.filterwarnings('ignore') class DataAnalyzer: def __init__(self): self.temp_dir = tempfile.mkdtemp() self.df = None self.AV = AutoViz_Class() def generate_sweetviz_report(self, df): if df is None: return "Please upload a dataset first" self.df = df report = sv.analyze(df) report_path = os.path.join(self.temp_dir, "report.html") report.show_html(report_path, open_browser=False) with open(report_path, 'r', encoding='utf-8') as f: html_content = f.read() html_with_table = f"""
{html_content}
""" os.remove(report_path) return html_with_table def preprocess_dataframe(self, df): df = df.copy() # Convert 'value' column to numeric if possible if 'value' in df.columns: df['value'] = pd.to_numeric(df['value'].replace('[\$,]', '', regex=True), errors='coerce') # Handle datetime columns for col in df.columns: if df[col].dtype == 'object': try: df[col] = pd.to_datetime(df[col], errors='ignore') except: pass # Convert categorical columns with low cardinality for col in df.select_dtypes(include=['object']).columns: if df[col].nunique() < 50: df[col] = df[col].astype('category') return df def generate_autoviz_report(self, df): if df is None: return "Please upload a dataset first" viz_temp_dir = os.path.join(self.temp_dir, "autoviz_output") if os.path.exists(viz_temp_dir): shutil.rmtree(viz_temp_dir) os.makedirs(viz_temp_dir) try: # Preprocess the dataframe df = self.preprocess_dataframe(df) # Sample if needed if len(df) > 5000: df = df.sample(n=5000, random_state=42) print("\nDataset Info:") print(df.info()) print("\nColumn Types:") print(df.dtypes) plt.close('all') # Create a directory for plots plots_dir = os.path.join(viz_temp_dir, "plots") os.makedirs(plots_dir, exist_ok=True) # Run AutoViz dfte = self.AV.AutoViz( filename='', sep=',', depVar='value', # Set value as target variable dfte=df, header=0, verbose=1, lowess=False, chart_format='html', max_rows_analyzed=5000, max_cols_analyzed=30, save_plot_dir=plots_dir ) # Generate summary statistics numeric_cols = df.select_dtypes(include=['number']).columns categorical_cols = df.select_dtypes(include=['category', 'object']).columns numeric_stats = df[numeric_cols].describe().round(2) if len(numeric_cols) > 0 else pd.DataFrame() categorical_stats = df[categorical_cols].describe() if len(categorical_cols) > 0 else pd.DataFrame() # Create HTML content with styling html_content = """ """ html_content += f"""

Data Analysis Report

Dataset Overview

Total Rows: {len(df)}

Total Columns: {len(df.columns)}

Numeric Variables Summary

{numeric_stats.to_html(classes='table table-striped')}

Categorical Variables Summary

{categorical_stats.to_html(classes='table table-striped')}

Column Types

{df.dtypes.to_string()}
""" # Add plots if they exist if os.path.exists(plots_dir): for file in sorted(os.listdir(plots_dir)): if file.endswith('.html'): with open(os.path.join(plots_dir, file), 'r', encoding='utf-8') as f: plot_content = f.read() if plot_content.strip(): html_content += f"""

{file.replace('.html', '').replace('_', ' ').title()}

{plot_content}
""" html_content += "
" return html_content except Exception as e: import traceback error_message = f"""

Error in AutoViz Analysis

Error details: {str(e)}

Stack trace:

{traceback.format_exc()}

Dataset Info:

                Rows: {len(df)}
                Columns: {len(df.columns)}
                Types:\n{df.dtypes.to_string()}
                
""" return error_message finally: if os.path.exists(viz_temp_dir): shutil.rmtree(viz_temp_dir) def create_interface(): analyzer = DataAnalyzer() with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown(""" # Data Analysis Dashboard This dashboard provides comprehensive data analysis and visualization capabilities. """) # Store the dataframe in a state variable current_df = gr.State(None) with gr.Tabs(): # First Tab: Data Upload & Preview with gr.TabItem("Data Upload & Preview"): with gr.Row(): with gr.Column(scale=2): file_input = gr.File( label="Upload CSV File", file_types=[".csv"], file_count="single" ) with gr.Column(scale=1): gr.Markdown(""" ### Upload Instructions 1. Select a CSV file 2. File will be automatically loaded 3. Preview will appear below """) with gr.Row(): data_info = gr.Markdown("No data uploaded yet") with gr.Row(): data_preview = gr.Dataframe( label="Data Preview", interactive=False, wrap=True ) def load_data(file): if file is None: return "No data uploaded yet", None, None try: df = pd.read_csv(file.name) info_text = f""" ### Dataset Information - Rows: {len(df)} - Columns: {len(df.columns)} - Memory Usage: {df.memory_usage(deep=True).sum() / 1024:.2f} KB - Column Types: {dict(df.dtypes.value_counts())} """ return info_text, df.head(10), df except Exception as e: return f"Error loading file: {str(e)}", None, None file_input.change( fn=load_data, inputs=[file_input], outputs=[data_info, data_preview, current_df] ) # Second Tab: Sweetviz Analysis with gr.TabItem("Sweetviz Analysis"): with gr.Row(): with gr.Column(scale=2): sweetviz_button = gr.Button( "Generate Sweetviz Report", variant="primary" ) with gr.Column(scale=1): gr.Markdown(""" ### Sweetviz Analysis Features - Comprehensive data profiling - Statistical analysis - Feature correlations - Missing value analysis """) with gr.Row(): sweetviz_output = gr.HTML( label="Sweetviz Report", value="Click the button above to generate the report" ) def generate_sweetviz(df): if df is None: return "Please upload a dataset first" try: return analyzer.generate_sweetviz_report(df) except Exception as e: return f"Error generating Sweetviz report: {str(e)}" sweetviz_button.click( fn=generate_sweetviz, inputs=[current_df], outputs=[sweetviz_output] ) # Third Tab: AutoViz Analysis with gr.TabItem("AutoViz Analysis"): with gr.Row(): with gr.Column(scale=2): autoviz_button = gr.Button( "Generate AutoViz Report", variant="primary" ) with gr.Column(scale=1): gr.Markdown(""" ### AutoViz Analysis Features - Automated visualization generation - Distribution analysis - Correlation plots - Feature relationships - Time series analysis (if applicable) """) with gr.Row(): autoviz_output = gr.HTML( label="AutoViz Report", value="Click the button above to generate the report" ) def generate_autoviz(df): if df is None: return "Please upload a dataset first" try: return analyzer.generate_autoviz_report(df) except Exception as e: return f"Error generating AutoViz report: {str(e)}" autoviz_button.click( fn=generate_autoviz, inputs=[current_df], outputs=[autoviz_output] ) return demo if __name__ == "__main__": demo = create_interface() demo.launch( server_name="0.0.0.0", server_port=7860, show_error=True, share=False # Set to True if you want to create a public link )