baconnier commited on
Commit
f421e23
1 Parent(s): 8c15039

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +177 -100
app.py CHANGED
@@ -10,6 +10,8 @@ from sklearn.preprocessing import StandardScaler
10
  from autoviz.AutoViz_Class import AutoViz_Class
11
  import shutil
12
  import warnings
 
 
13
  warnings.filterwarnings('ignore')
14
 
15
  class DataAnalyzer:
@@ -46,18 +48,12 @@ class DataAnalyzer:
46
  return html_with_table
47
 
48
  def preprocess_dataframe(self, df):
49
- """Preprocess dataframe for visualization"""
50
  df = df.copy()
51
 
52
  # Convert 'value' column to numeric if possible
53
- try:
54
- # Remove any currency symbols and commas
55
- df['value'] = df['value'].replace('[\$,]', '', regex=True)
56
- # Convert to float
57
- df['value'] = pd.to_numeric(df['value'], errors='coerce')
58
- except:
59
- pass
60
-
61
  # Handle datetime columns
62
  for col in df.columns:
63
  if df[col].dtype == 'object':
@@ -65,18 +61,12 @@ class DataAnalyzer:
65
  df[col] = pd.to_datetime(df[col], errors='ignore')
66
  except:
67
  pass
68
-
69
- datetime_columns = df.select_dtypes(include=['datetime64']).columns
70
- for col in datetime_columns:
71
- df[f'{col}_year'] = df[col].dt.year
72
- df[f'{col}_month'] = df[col].dt.month
73
- df = df.drop(columns=[col])
74
-
75
  # Convert categorical columns with low cardinality
76
  for col in df.select_dtypes(include=['object']).columns:
77
  if df[col].nunique() < 50:
78
  df[col] = df[col].astype('category')
79
-
80
  return df
81
 
82
  def generate_autoviz_report(self, df):
@@ -103,7 +93,11 @@ class DataAnalyzer:
103
 
104
  plt.close('all')
105
 
106
- # Run AutoViz with modified settings
 
 
 
 
107
  dfte = self.AV.AutoViz(
108
  filename='',
109
  sep=',',
@@ -112,64 +106,92 @@ class DataAnalyzer:
112
  header=0,
113
  verbose=1,
114
  lowess=False,
115
- chart_format='html', # Changed back to html
116
  max_rows_analyzed=5000,
117
  max_cols_analyzed=30,
118
- save_plot_dir=viz_temp_dir
119
  )
120
 
121
- # Collect visualizations
122
- html_parts = []
123
- if os.path.exists(viz_temp_dir):
124
- for file in sorted(os.listdir(viz_temp_dir)):
125
- if file.endswith('.html'):
126
- file_path = os.path.join(viz_temp_dir, file)
127
- try:
128
- with open(file_path, 'r', encoding='utf-8') as f:
129
- content = f.read()
130
- if content.strip():
131
- html_parts.append(content)
132
- except Exception as e:
133
- print(f"Error reading file {file}: {str(e)}")
134
-
135
  # Generate summary statistics
136
- numeric_summary = df.describe().to_html() if not df.select_dtypes(include=['number']).empty else ""
137
- categorical_summary = df.describe(include=['category', 'object']).to_html() if not df.select_dtypes(include=['category', 'object']).empty else ""
138
-
139
- if not html_parts:
140
- return f"""
141
- <div style="padding: 20px; border: 1px solid #ddd; border-radius: 5px;">
142
- <h3>Data Summary</h3>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  <p>Total Rows: {len(df)}</p>
144
  <p>Total Columns: {len(df.columns)}</p>
145
- <h4>Numeric Summary:</h4>
146
- {numeric_summary}
147
- <h4>Categorical Summary:</h4>
148
- {categorical_summary}
149
- <hr>
150
- <h3>Column Types:</h3>
 
 
 
 
 
 
151
  <pre>{df.dtypes.to_string()}</pre>
152
  </div>
153
- """
154
-
155
- combined_html = f"""
156
- <div style="padding: 20px; border: 1px solid #ddd; border-radius: 5px;">
157
- <h2 style="text-align: center;">AutoViz Analysis Report</h2>
158
- <div style="margin: 20px;">
159
- <h3>Dataset Summary</h3>
160
- <p>Rows analyzed: {len(df)}</p>
161
- <p>Columns: {len(df.columns)}</p>
162
- <h4>Numeric Summary:</h4>
163
- {numeric_summary}
164
- <h4>Categorical Summary:</h4>
165
- {categorical_summary}
166
- </div>
167
- <hr>
168
- {'<hr>'.join(html_parts)}
169
- </div>
170
  """
171
-
172
- return combined_html
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
 
174
  except Exception as e:
175
  import traceback
@@ -192,11 +214,15 @@ class DataAnalyzer:
192
  if os.path.exists(viz_temp_dir):
193
  shutil.rmtree(viz_temp_dir)
194
 
 
195
  def create_interface():
196
  analyzer = DataAnalyzer()
197
 
198
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
199
- gr.Markdown("# Data Analysis Dashboard")
 
 
 
200
 
201
  # Store the dataframe in a state variable
202
  current_df = gr.State(None)
@@ -205,49 +231,82 @@ def create_interface():
205
  # First Tab: Data Upload & Preview
206
  with gr.TabItem("Data Upload & Preview"):
207
  with gr.Row():
208
- file_input = gr.File(label="Upload CSV")
209
- data_preview = gr.Dataframe(label="Data Preview", interactive=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
  with gr.Row():
211
- gr.Markdown("""
212
- ### Data Preview Info
213
- - Upload a CSV file to begin analysis
214
- - First few rows will be shown here
215
- - Data types and basic statistics will be displayed
216
- """)
217
 
218
  def load_data(file):
219
  if file is None:
220
- return None, None
221
  try:
222
  df = pd.read_csv(file.name)
223
- return df.head(), df
 
 
 
 
 
 
 
224
  except Exception as e:
225
- return None, None
226
 
227
  file_input.change(
228
  fn=load_data,
229
  inputs=[file_input],
230
- outputs=[data_preview, current_df]
231
  )
232
 
233
  # Second Tab: Sweetviz Analysis
234
  with gr.TabItem("Sweetviz Analysis"):
235
  with gr.Row():
236
- sweetviz_button = gr.Button("Generate Sweetviz Report")
237
- sweetviz_output = gr.HTML(label="Sweetviz Report")
 
 
 
 
 
 
 
 
 
 
 
 
238
  with gr.Row():
239
- gr.Markdown("""
240
- ### Sweetviz Analysis Info
241
- - Comprehensive data profiling
242
- - Statistical analysis
243
- - Feature correlations
244
- - Missing value analysis
245
- """)
246
 
247
  def generate_sweetviz(df):
248
  if df is None:
249
  return "Please upload a dataset first"
250
- return analyzer.generate_sweetviz_report(df)
 
 
 
251
 
252
  sweetviz_button.click(
253
  fn=generate_sweetviz,
@@ -258,21 +317,34 @@ def create_interface():
258
  # Third Tab: AutoViz Analysis
259
  with gr.TabItem("AutoViz Analysis"):
260
  with gr.Row():
261
- autoviz_button = gr.Button("Generate AutoViz Report")
262
- autoviz_output = gr.HTML(label="AutoViz Report")
 
 
 
 
 
 
 
 
 
 
 
 
 
263
  with gr.Row():
264
- gr.Markdown("""
265
- ### AutoViz Analysis Info
266
- - Automated visualization generation
267
- - Distribution analysis
268
- - Correlation plots
269
- - Feature relationships
270
- """)
271
 
272
  def generate_autoviz(df):
273
  if df is None:
274
  return "Please upload a dataset first"
275
- return analyzer.generate_autoviz_report(df)
 
 
 
276
 
277
  autoviz_button.click(
278
  fn=generate_autoviz,
@@ -284,4 +356,9 @@ def create_interface():
284
 
285
  if __name__ == "__main__":
286
  demo = create_interface()
287
- demo.launch(show_error=True)
 
 
 
 
 
 
10
  from autoviz.AutoViz_Class import AutoViz_Class
11
  import shutil
12
  import warnings
13
+ import io
14
+ import base64
15
  warnings.filterwarnings('ignore')
16
 
17
  class DataAnalyzer:
 
48
  return html_with_table
49
 
50
  def preprocess_dataframe(self, df):
 
51
  df = df.copy()
52
 
53
  # Convert 'value' column to numeric if possible
54
+ if 'value' in df.columns:
55
+ df['value'] = pd.to_numeric(df['value'].replace('[\$,]', '', regex=True), errors='coerce')
56
+
 
 
 
 
 
57
  # Handle datetime columns
58
  for col in df.columns:
59
  if df[col].dtype == 'object':
 
61
  df[col] = pd.to_datetime(df[col], errors='ignore')
62
  except:
63
  pass
64
+
 
 
 
 
 
 
65
  # Convert categorical columns with low cardinality
66
  for col in df.select_dtypes(include=['object']).columns:
67
  if df[col].nunique() < 50:
68
  df[col] = df[col].astype('category')
69
+
70
  return df
71
 
72
  def generate_autoviz_report(self, df):
 
93
 
94
  plt.close('all')
95
 
96
+ # Create a directory for plots
97
+ plots_dir = os.path.join(viz_temp_dir, "plots")
98
+ os.makedirs(plots_dir, exist_ok=True)
99
+
100
+ # Run AutoViz
101
  dfte = self.AV.AutoViz(
102
  filename='',
103
  sep=',',
 
106
  header=0,
107
  verbose=1,
108
  lowess=False,
109
+ chart_format='html',
110
  max_rows_analyzed=5000,
111
  max_cols_analyzed=30,
112
+ save_plot_dir=plots_dir
113
  )
114
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  # Generate summary statistics
116
+ numeric_cols = df.select_dtypes(include=['number']).columns
117
+ categorical_cols = df.select_dtypes(include=['category', 'object']).columns
118
+
119
+ numeric_stats = df[numeric_cols].describe().round(2) if len(numeric_cols) > 0 else pd.DataFrame()
120
+ categorical_stats = df[categorical_cols].describe() if len(categorical_cols) > 0 else pd.DataFrame()
121
+
122
+ # Create HTML content with styling
123
+ html_content = """
124
+ <style>
125
+ .table {
126
+ width: 100%;
127
+ margin-bottom: 1rem;
128
+ color: #212529;
129
+ border-collapse: collapse;
130
+ }
131
+ .table-striped tbody tr:nth-of-type(odd) {
132
+ background-color: rgba(0,0,0,.05);
133
+ }
134
+ .table td, .table th {
135
+ padding: .75rem;
136
+ border: 1px solid #dee2e6;
137
+ }
138
+ .table th {
139
+ background-color: #f8f9fa;
140
+ }
141
+ pre {
142
+ background-color: #f8f9fa;
143
+ padding: 1rem;
144
+ border-radius: 4px;
145
+ }
146
+ .viz-container {
147
+ margin: 20px 0;
148
+ padding: 20px;
149
+ border: 1px solid #ddd;
150
+ border-radius: 5px;
151
+ }
152
+ </style>
153
+ """
154
+
155
+ html_content += f"""
156
+ <div class="viz-container">
157
+ <h2 style="text-align: center;">Data Analysis Report</h2>
158
+
159
+ <div style="margin: 20px;">
160
+ <h3>Dataset Overview</h3>
161
  <p>Total Rows: {len(df)}</p>
162
  <p>Total Columns: {len(df.columns)}</p>
163
+
164
+ <h3>Numeric Variables Summary</h3>
165
+ <div style="overflow-x: auto;">
166
+ {numeric_stats.to_html(classes='table table-striped')}
167
+ </div>
168
+
169
+ <h3>Categorical Variables Summary</h3>
170
+ <div style="overflow-x: auto;">
171
+ {categorical_stats.to_html(classes='table table-striped')}
172
+ </div>
173
+
174
+ <h3>Column Types</h3>
175
  <pre>{df.dtypes.to_string()}</pre>
176
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  """
178
+
179
+ # Add plots if they exist
180
+ if os.path.exists(plots_dir):
181
+ for file in sorted(os.listdir(plots_dir)):
182
+ if file.endswith('.html'):
183
+ with open(os.path.join(plots_dir, file), 'r', encoding='utf-8') as f:
184
+ plot_content = f.read()
185
+ if plot_content.strip():
186
+ html_content += f"""
187
+ <div class="viz-container">
188
+ <h3>{file.replace('.html', '').replace('_', ' ').title()}</h3>
189
+ {plot_content}
190
+ </div>
191
+ """
192
+
193
+ html_content += "</div>"
194
+ return html_content
195
 
196
  except Exception as e:
197
  import traceback
 
214
  if os.path.exists(viz_temp_dir):
215
  shutil.rmtree(viz_temp_dir)
216
 
217
+
218
  def create_interface():
219
  analyzer = DataAnalyzer()
220
 
221
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
222
+ gr.Markdown("""
223
+ # Data Analysis Dashboard
224
+ This dashboard provides comprehensive data analysis and visualization capabilities.
225
+ """)
226
 
227
  # Store the dataframe in a state variable
228
  current_df = gr.State(None)
 
231
  # First Tab: Data Upload & Preview
232
  with gr.TabItem("Data Upload & Preview"):
233
  with gr.Row():
234
+ with gr.Column(scale=2):
235
+ file_input = gr.File(
236
+ label="Upload CSV File",
237
+ file_types=[".csv"],
238
+ file_count="single"
239
+ )
240
+ with gr.Column(scale=1):
241
+ gr.Markdown("""
242
+ ### Upload Instructions
243
+ 1. Select a CSV file
244
+ 2. File will be automatically loaded
245
+ 3. Preview will appear below
246
+ """)
247
+
248
+ with gr.Row():
249
+ data_info = gr.Markdown("No data uploaded yet")
250
+
251
  with gr.Row():
252
+ data_preview = gr.Dataframe(
253
+ label="Data Preview",
254
+ interactive=False,
255
+ wrap=True
256
+ )
 
257
 
258
  def load_data(file):
259
  if file is None:
260
+ return "No data uploaded yet", None, None
261
  try:
262
  df = pd.read_csv(file.name)
263
+ info_text = f"""
264
+ ### Dataset Information
265
+ - Rows: {len(df)}
266
+ - Columns: {len(df.columns)}
267
+ - Memory Usage: {df.memory_usage(deep=True).sum() / 1024:.2f} KB
268
+ - Column Types: {dict(df.dtypes.value_counts())}
269
+ """
270
+ return info_text, df.head(10), df
271
  except Exception as e:
272
+ return f"Error loading file: {str(e)}", None, None
273
 
274
  file_input.change(
275
  fn=load_data,
276
  inputs=[file_input],
277
+ outputs=[data_info, data_preview, current_df]
278
  )
279
 
280
  # Second Tab: Sweetviz Analysis
281
  with gr.TabItem("Sweetviz Analysis"):
282
  with gr.Row():
283
+ with gr.Column(scale=2):
284
+ sweetviz_button = gr.Button(
285
+ "Generate Sweetviz Report",
286
+ variant="primary"
287
+ )
288
+ with gr.Column(scale=1):
289
+ gr.Markdown("""
290
+ ### Sweetviz Analysis Features
291
+ - Comprehensive data profiling
292
+ - Statistical analysis
293
+ - Feature correlations
294
+ - Missing value analysis
295
+ """)
296
+
297
  with gr.Row():
298
+ sweetviz_output = gr.HTML(
299
+ label="Sweetviz Report",
300
+ value="Click the button above to generate the report"
301
+ )
 
 
 
302
 
303
  def generate_sweetviz(df):
304
  if df is None:
305
  return "Please upload a dataset first"
306
+ try:
307
+ return analyzer.generate_sweetviz_report(df)
308
+ except Exception as e:
309
+ return f"Error generating Sweetviz report: {str(e)}"
310
 
311
  sweetviz_button.click(
312
  fn=generate_sweetviz,
 
317
  # Third Tab: AutoViz Analysis
318
  with gr.TabItem("AutoViz Analysis"):
319
  with gr.Row():
320
+ with gr.Column(scale=2):
321
+ autoviz_button = gr.Button(
322
+ "Generate AutoViz Report",
323
+ variant="primary"
324
+ )
325
+ with gr.Column(scale=1):
326
+ gr.Markdown("""
327
+ ### AutoViz Analysis Features
328
+ - Automated visualization generation
329
+ - Distribution analysis
330
+ - Correlation plots
331
+ - Feature relationships
332
+ - Time series analysis (if applicable)
333
+ """)
334
+
335
  with gr.Row():
336
+ autoviz_output = gr.HTML(
337
+ label="AutoViz Report",
338
+ value="Click the button above to generate the report"
339
+ )
 
 
 
340
 
341
  def generate_autoviz(df):
342
  if df is None:
343
  return "Please upload a dataset first"
344
+ try:
345
+ return analyzer.generate_autoviz_report(df)
346
+ except Exception as e:
347
+ return f"Error generating AutoViz report: {str(e)}"
348
 
349
  autoviz_button.click(
350
  fn=generate_autoviz,
 
356
 
357
  if __name__ == "__main__":
358
  demo = create_interface()
359
+ demo.launch(
360
+ server_name="0.0.0.0",
361
+ server_port=7860,
362
+ show_error=True,
363
+ share=False # Set to True if you want to create a public link
364
+ )