rameshmoorthy commited on
Commit
68c1008
1 Parent(s): 6a5ab3f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -33
app.py CHANGED
@@ -9,45 +9,64 @@ from autoviz.AutoViz_Class import AutoViz_Class
9
  from traceml.summary.df import DataFrameSummary
10
 
11
  def variable_table(df):
12
- """
13
- Analyzes a DataFrame and categorizes variables with colorful HTML formatting.
14
-
15
- Args:
16
  df (pandas.DataFrame): The DataFrame to analyze.
17
-
18
- Returns:
19
  str: HTML code representing the analysis results with colorful highlights.
20
- """
21
- # Analyze variable types
22
- categorical_vars = df.select_dtypes(include=['category', 'object']).columns.tolist()
23
- numerical_vars = df.select_dtypes(include=['int64', 'float64']).columns.tolist()
24
- text_vars = df.select_dtypes(include=['object']).difference(categorical_vars).tolist()
25
-
26
- # Build HTML table with styles
27
- table_style = 'border: 1px solid #ddd; border-collapse: collapse; text-align: left; font-size: 14px;'
28
- header_style = 'background-color: #f2f2f2; padding: 5px 10px;'
29
- data_style = 'padding: 5px 10px; border-bottom: 1px solid #ddd;'
30
- category_color = '#90ee90' # Light green for categorical
31
- numerical_color = '#add8e6' # Light blue for numerical
32
- text_color = '#ffd9b3' # Light yellow for text
33
-
34
- html = f"<table style='{table_style}'>"
35
- html += f"<tr><th style='{header_style}'>Variable Type</th><th style='{header_style}'>Columns</th></tr>"
36
-
37
- # Add rows for each variable type with coloring
38
- if categorical_vars:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  html += f"<tr style='background-color: {category_color};'><td>Categorical</td><td style='{data_style}'>{', '.join(categorical_vars)}</td></tr>"
40
- if numerical_vars:
41
  html += f"<tr style='background-color: {numerical_color};'><td>Numerical</td><td style='{data_style}'>{', '.join(numerical_vars)}</td></tr>"
42
- if text_vars:
43
  html += f"<tr style='background-color: {text_color};'><td>Text</td><td style='{data_style}'>{', '.join(text_vars)}</td></tr>"
44
-
45
- # Handle cases where no variables are found
46
- if not (categorical_vars or numerical_vars or text_vars):
47
  html += "<tr><td>No variables found!</td></tr>"
48
-
49
- html += "</table>"
50
- return html
51
 
52
  def generate_report(file, type):
53
  df = pd.read_csv(file) if file.name.endswith(".csv") else pd.read_excel(file)
 
9
  from traceml.summary.df import DataFrameSummary
10
 
11
  def variable_table(df):
12
+ """
13
+ Analyzes a DataFrame and categorizes variables with colorful HTML formatting.
14
+
15
+ Args:
16
  df (pandas.DataFrame): The DataFrame to analyze.
17
+
18
+ Returns:
19
  str: HTML code representing the analysis results with colorful highlights.
20
+ """
21
+ # Analyze variable types
22
+ numerical_vars=[]
23
+ text_vars=[]
24
+ categorical_vars=[]
25
+ # Identify categorical, numerical, and text columns
26
+ # Iterate over columns
27
+ for col in df.columns:
28
+ print(col )
29
+ unique_values = df[col].nunique()
30
+ total_values = df[col].count()
31
+ dtype_kind = df[col].dtype.kind
32
+ print( 'unique value-',unique_values,' total_values ',total_values,dtype_kind)
33
+ # Check criteria for numerical, text, and categorical variables
34
+ if (dtype_kind == 'f' or dtype_kind == 'i') and unique_values >= total_values / 10:
35
+ numerical_vars.append(col)
36
+ print('Numerical')
37
+ elif (dtype_kind == 'O' or dtype_kind == 'b') and unique_values > total_values / 10:
38
+ text_vars.append(col)
39
+ print('Text')
40
+ else:
41
+ categorical_vars.append(col)
42
+ print('categorical')
43
+ print(categorical_vars,numerical_vars,text_vars)
44
+
45
+ # Build HTML table with styles
46
+ table_style = 'border: 1px solid #ddd; border-collapse: collapse; text-align: left; font-size: 14px;'
47
+ header_style = 'background-color: #f2f2f2; padding: 5px 10px;'
48
+ data_style = 'padding: 5px 10px; border-bottom: 1px solid #ddd;'
49
+ category_color = '#90ee90' # Light green for categorical
50
+ numerical_color = '#add8e6' # Light blue for numerical
51
+ text_color = '#ffd9b3' # Light yellow for text
52
+
53
+ html = f"<table style='{table_style}'>"
54
+ html += f"<tr><th style='{header_style}'>Variable Type</th><th style='{header_style}'>Columns</th></tr>"
55
+
56
+ # Add rows for each variable type with coloring
57
+ if categorical_vars:
58
  html += f"<tr style='background-color: {category_color};'><td>Categorical</td><td style='{data_style}'>{', '.join(categorical_vars)}</td></tr>"
59
+ if numerical_vars:
60
  html += f"<tr style='background-color: {numerical_color};'><td>Numerical</td><td style='{data_style}'>{', '.join(numerical_vars)}</td></tr>"
61
+ if text_vars:
62
  html += f"<tr style='background-color: {text_color};'><td>Text</td><td style='{data_style}'>{', '.join(text_vars)}</td></tr>"
63
+
64
+ # Handle cases where no variables are found
65
+ if not (categorical_vars or numerical_vars or text_vars):
66
  html += "<tr><td>No variables found!</td></tr>"
67
+
68
+ html += "</table>"
69
+ return html
70
 
71
  def generate_report(file, type):
72
  df = pd.read_csv(file) if file.name.endswith(".csv") else pd.read_excel(file)