Spaces:

baconnier
/

csv-plus-plus

Sleeping

App Files Files Community

baconnier commited on Oct 26, 2024

Commit

f421e23

•

1 Parent(s): 8c15039

Update app.py

Browse files

Files changed (1) hide show

app.py +177 -100

app.py CHANGED Viewed

@@ -10,6 +10,8 @@ from sklearn.preprocessing import StandardScaler
 from autoviz.AutoViz_Class import AutoViz_Class
 import shutil
 import warnings
 warnings.filterwarnings('ignore')
 class DataAnalyzer:
@@ -46,18 +48,12 @@ class DataAnalyzer:
         return html_with_table
     def preprocess_dataframe(self, df):
-        """Preprocess dataframe for visualization"""
         df = df.copy()
         # Convert 'value' column to numeric if possible
-        try:
-            # Remove any currency symbols and commas
-            df['value'] = df['value'].replace('[\$,]', '', regex=True)
-            # Convert to float
-            df['value'] = pd.to_numeric(df['value'], errors='coerce')
-        except:
-            pass
         # Handle datetime columns
         for col in df.columns:
             if df[col].dtype == 'object':
@@ -65,18 +61,12 @@ class DataAnalyzer:
                     df[col] = pd.to_datetime(df[col], errors='ignore')
                 except:
                     pass
-        datetime_columns = df.select_dtypes(include=['datetime64']).columns
-        for col in datetime_columns:
-            df[f'{col}_year'] = df[col].dt.year
-            df[f'{col}_month'] = df[col].dt.month
-            df = df.drop(columns=[col])
         # Convert categorical columns with low cardinality
         for col in df.select_dtypes(include=['object']).columns:
             if df[col].nunique() < 50:
                 df[col] = df[col].astype('category')
         return df
     def generate_autoviz_report(self, df):
@@ -103,7 +93,11 @@ class DataAnalyzer:
             plt.close('all')
-            # Run AutoViz with modified settings
             dfte = self.AV.AutoViz(
                 filename='',
                 sep=',',
@@ -112,64 +106,92 @@ class DataAnalyzer:
                 header=0,
                 verbose=1,
                 lowess=False,
-                chart_format='html',  # Changed back to html
                 max_rows_analyzed=5000,
                 max_cols_analyzed=30,
-                save_plot_dir=viz_temp_dir
             )
-            # Collect visualizations
-            html_parts = []
-            if os.path.exists(viz_temp_dir):
-                for file in sorted(os.listdir(viz_temp_dir)):
-                    if file.endswith('.html'):
-                        file_path = os.path.join(viz_temp_dir, file)
-                        try:
-                            with open(file_path, 'r', encoding='utf-8') as f:
-                                content = f.read()
-                                if content.strip():
-                                    html_parts.append(content)
-                        except Exception as e:
-                            print(f"Error reading file {file}: {str(e)}")
             # Generate summary statistics
-            numeric_summary = df.describe().to_html() if not df.select_dtypes(include=['number']).empty else ""
-            categorical_summary = df.describe(include=['category', 'object']).to_html() if not df.select_dtypes(include=['category', 'object']).empty else ""
-            if not html_parts:
-                return f"""
-                <div style="padding: 20px; border: 1px solid #ddd; border-radius: 5px;">
-                    <h3>Data Summary</h3>
                     <p>Total Rows: {len(df)}</p>
                     <p>Total Columns: {len(df.columns)}</p>
-                    <h4>Numeric Summary:</h4>
-                    {numeric_summary}
-                    <h4>Categorical Summary:</h4>
-                    {categorical_summary}
-                    <hr>
-                    <h3>Column Types:</h3>
                     <pre>{df.dtypes.to_string()}</pre>
                 </div>
-                """
-            combined_html = f"""
-            <div style="padding: 20px; border: 1px solid #ddd; border-radius: 5px;">
-                <h2 style="text-align: center;">AutoViz Analysis Report</h2>
-                <div style="margin: 20px;">
-                    <h3>Dataset Summary</h3>
-                    <p>Rows analyzed: {len(df)}</p>
-                    <p>Columns: {len(df.columns)}</p>
-                    <h4>Numeric Summary:</h4>
-                    {numeric_summary}
-                    <h4>Categorical Summary:</h4>
-                    {categorical_summary}
-                </div>
-                <hr>
-                {'<hr>'.join(html_parts)}
-            </div>
             """
-            return combined_html
         except Exception as e:
             import traceback
@@ -192,11 +214,15 @@ class DataAnalyzer:
             if os.path.exists(viz_temp_dir):
                 shutil.rmtree(viz_temp_dir)
 def create_interface():
     analyzer = DataAnalyzer()
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
-        gr.Markdown("# Data Analysis Dashboard")
         # Store the dataframe in a state variable
         current_df = gr.State(None)
@@ -205,49 +231,82 @@ def create_interface():
             # First Tab: Data Upload & Preview
             with gr.TabItem("Data Upload & Preview"):
                 with gr.Row():
-                    file_input = gr.File(label="Upload CSV")
-                data_preview = gr.Dataframe(label="Data Preview", interactive=False)
                 with gr.Row():
-                    gr.Markdown("""
-                    ### Data Preview Info
-                    - Upload a CSV file to begin analysis
-                    - First few rows will be shown here
-                    - Data types and basic statistics will be displayed
-                    """)
                 def load_data(file):
                     if file is None:
-                        return None, None
                     try:
                         df = pd.read_csv(file.name)
-                        return df.head(), df
                     except Exception as e:
-                        return None, None
                 file_input.change(
                     fn=load_data,
                     inputs=[file_input],
-                    outputs=[data_preview, current_df]
                 )
             # Second Tab: Sweetviz Analysis
             with gr.TabItem("Sweetviz Analysis"):
                 with gr.Row():
-                    sweetviz_button = gr.Button("Generate Sweetviz Report")
-                sweetviz_output = gr.HTML(label="Sweetviz Report")
                 with gr.Row():
-                    gr.Markdown("""
-                    ### Sweetviz Analysis Info
-                    - Comprehensive data profiling
-                    - Statistical analysis
-                    - Feature correlations
-                    - Missing value analysis
-                    """)
                 def generate_sweetviz(df):
                     if df is None:
                         return "Please upload a dataset first"
-                    return analyzer.generate_sweetviz_report(df)
                 sweetviz_button.click(
                     fn=generate_sweetviz,
@@ -258,21 +317,34 @@ def create_interface():
             # Third Tab: AutoViz Analysis
             with gr.TabItem("AutoViz Analysis"):
                 with gr.Row():
-                    autoviz_button = gr.Button("Generate AutoViz Report")
-                autoviz_output = gr.HTML(label="AutoViz Report")
                 with gr.Row():
-                    gr.Markdown("""
-                    ### AutoViz Analysis Info
-                    - Automated visualization generation
-                    - Distribution analysis
-                    - Correlation plots
-                    - Feature relationships
-                    """)
                 def generate_autoviz(df):
                     if df is None:
                         return "Please upload a dataset first"
-                    return analyzer.generate_autoviz_report(df)
                 autoviz_button.click(
                     fn=generate_autoviz,
@@ -284,4 +356,9 @@ def create_interface():
 if __name__ == "__main__":
     demo = create_interface()
-    demo.launch(show_error=True)

 from autoviz.AutoViz_Class import AutoViz_Class
 import shutil
 import warnings
+import io
+import base64
 warnings.filterwarnings('ignore')
 class DataAnalyzer:
         return html_with_table
     def preprocess_dataframe(self, df):
         df = df.copy()
         # Convert 'value' column to numeric if possible
+        if 'value' in df.columns:
+            df['value'] = pd.to_numeric(df['value'].replace('[\$,]', '', regex=True), errors='coerce')
         # Handle datetime columns
         for col in df.columns:
             if df[col].dtype == 'object':
                     df[col] = pd.to_datetime(df[col], errors='ignore')
                 except:
                     pass
         # Convert categorical columns with low cardinality
         for col in df.select_dtypes(include=['object']).columns:
             if df[col].nunique() < 50:
                 df[col] = df[col].astype('category')
         return df
     def generate_autoviz_report(self, df):
             plt.close('all')
+            # Create a directory for plots
+            plots_dir = os.path.join(viz_temp_dir, "plots")
+            os.makedirs(plots_dir, exist_ok=True)
+            # Run AutoViz
             dfte = self.AV.AutoViz(
                 filename='',
                 sep=',',
                 header=0,
                 verbose=1,
                 lowess=False,
+                chart_format='html',
                 max_rows_analyzed=5000,
                 max_cols_analyzed=30,
+                save_plot_dir=plots_dir
             )
             # Generate summary statistics
+            numeric_cols = df.select_dtypes(include=['number']).columns
+            categorical_cols = df.select_dtypes(include=['category', 'object']).columns
+            numeric_stats = df[numeric_cols].describe().round(2) if len(numeric_cols) > 0 else pd.DataFrame()
+            categorical_stats = df[categorical_cols].describe() if len(categorical_cols) > 0 else pd.DataFrame()
+            # Create HTML content with styling
+            html_content = """
+            <style>
+                .table {
+                    width: 100%;
+                    margin-bottom: 1rem;
+                    color: #212529;
+                    border-collapse: collapse;
+                }
+                .table-striped tbody tr:nth-of-type(odd) {
+                    background-color: rgba(0,0,0,.05);
+                }
+                .table td, .table th {
+                    padding: .75rem;
+                    border: 1px solid #dee2e6;
+                }
+                .table th {
+                    background-color: #f8f9fa;
+                }
+                pre {
+                    background-color: #f8f9fa;
+                    padding: 1rem;
+                    border-radius: 4px;
+                }
+                .viz-container {
+                    margin: 20px 0;
+                    padding: 20px;
+                    border: 1px solid #ddd;
+                    border-radius: 5px;
+                }
+            </style>
+            """
+            html_content += f"""
+            <div class="viz-container">
+                <h2 style="text-align: center;">Data Analysis Report</h2>
+                <div style="margin: 20px;">
+                    <h3>Dataset Overview</h3>
                     <p>Total Rows: {len(df)}</p>
                     <p>Total Columns: {len(df.columns)}</p>
+                    <h3>Numeric Variables Summary</h3>
+                    <div style="overflow-x: auto;">
+                        {numeric_stats.to_html(classes='table table-striped')}
+                    </div>
+                    <h3>Categorical Variables Summary</h3>
+                    <div style="overflow-x: auto;">
+                        {categorical_stats.to_html(classes='table table-striped')}
+                    </div>
+                    <h3>Column Types</h3>
                     <pre>{df.dtypes.to_string()}</pre>
                 </div>
             """
+            # Add plots if they exist
+            if os.path.exists(plots_dir):
+                for file in sorted(os.listdir(plots_dir)):
+                    if file.endswith('.html'):
+                        with open(os.path.join(plots_dir, file), 'r', encoding='utf-8') as f:
+                            plot_content = f.read()
+                            if plot_content.strip():
+                                html_content += f"""
+                                <div class="viz-container">
+                                    <h3>{file.replace('.html', '').replace('_', ' ').title()}</h3>
+                                    {plot_content}
+                                </div>
+                                """
+            html_content += "</div>"
+            return html_content
         except Exception as e:
             import traceback
             if os.path.exists(viz_temp_dir):
                 shutil.rmtree(viz_temp_dir)
 def create_interface():
     analyzer = DataAnalyzer()
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
+        gr.Markdown("""
+        # Data Analysis Dashboard
+        This dashboard provides comprehensive data analysis and visualization capabilities.
+        """)
         # Store the dataframe in a state variable
         current_df = gr.State(None)
             # First Tab: Data Upload & Preview
             with gr.TabItem("Data Upload & Preview"):
                 with gr.Row():
+                    with gr.Column(scale=2):
+                        file_input = gr.File(
+                            label="Upload CSV File",
+                            file_types=[".csv"],
+                            file_count="single"
+                        )
+                    with gr.Column(scale=1):
+                        gr.Markdown("""
+                        ### Upload Instructions
+                        1. Select a CSV file
+                        2. File will be automatically loaded
+                        3. Preview will appear below
+                        """)
+                with gr.Row():
+                    data_info = gr.Markdown("No data uploaded yet")
                 with gr.Row():
+                    data_preview = gr.Dataframe(
+                        label="Data Preview",
+                        interactive=False,
+                        wrap=True
+                    )
                 def load_data(file):
                     if file is None:
+                        return "No data uploaded yet", None, None
                     try:
                         df = pd.read_csv(file.name)
+                        info_text = f"""
+                        ### Dataset Information
+                        - Rows: {len(df)}
+                        - Columns: {len(df.columns)}
+                        - Memory Usage: {df.memory_usage(deep=True).sum() / 1024:.2f} KB
+                        - Column Types: {dict(df.dtypes.value_counts())}
+                        """
+                        return info_text, df.head(10), df
                     except Exception as e:
+                        return f"Error loading file: {str(e)}", None, None
                 file_input.change(
                     fn=load_data,
                     inputs=[file_input],
+                    outputs=[data_info, data_preview, current_df]
                 )
             # Second Tab: Sweetviz Analysis
             with gr.TabItem("Sweetviz Analysis"):
                 with gr.Row():
+                    with gr.Column(scale=2):
+                        sweetviz_button = gr.Button(
+                            "Generate Sweetviz Report",
+                            variant="primary"
+                        )
+                    with gr.Column(scale=1):
+                        gr.Markdown("""
+                        ### Sweetviz Analysis Features
+                        - Comprehensive data profiling
+                        - Statistical analysis
+                        - Feature correlations
+                        - Missing value analysis
+                        """)
                 with gr.Row():
+                    sweetviz_output = gr.HTML(
+                        label="Sweetviz Report",
+                        value="Click the button above to generate the report"
+                    )
                 def generate_sweetviz(df):
                     if df is None:
                         return "Please upload a dataset first"
+                    try:
+                        return analyzer.generate_sweetviz_report(df)
+                    except Exception as e:
+                        return f"Error generating Sweetviz report: {str(e)}"
                 sweetviz_button.click(
                     fn=generate_sweetviz,
             # Third Tab: AutoViz Analysis
             with gr.TabItem("AutoViz Analysis"):
                 with gr.Row():
+                    with gr.Column(scale=2):
+                        autoviz_button = gr.Button(
+                            "Generate AutoViz Report",
+                            variant="primary"
+                        )
+                    with gr.Column(scale=1):
+                        gr.Markdown("""
+                        ### AutoViz Analysis Features
+                        - Automated visualization generation
+                        - Distribution analysis
+                        - Correlation plots
+                        - Feature relationships
+                        - Time series analysis (if applicable)
+                        """)
                 with gr.Row():
+                    autoviz_output = gr.HTML(
+                        label="AutoViz Report",
+                        value="Click the button above to generate the report"
+                    )
                 def generate_autoviz(df):
                     if df is None:
                         return "Please upload a dataset first"
+                    try:
+                        return analyzer.generate_autoviz_report(df)
+                    except Exception as e:
+                        return f"Error generating AutoViz report: {str(e)}"
                 autoviz_button.click(
                     fn=generate_autoviz,
 if __name__ == "__main__":
     demo = create_interface()
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        show_error=True,
+        share=False  # Set to True if you want to create a public link
+    )