File size: 7,629 Bytes
0a5d39a
7a75289
0a5d39a
71c9026
dd375ae
1065d81
bbe3128
 
 
c335d03
6a5ab3f
68c1008
 
 
 
6a5ab3f
68c1008
 
6a5ab3f
68c1008
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a5ab3f
68c1008
 
 
 
 
 
 
 
 
 
 
 
 
5bc9631
68c1008
5bc9631
68c1008
5bc9631
68c1008
 
 
5bc9631
68c1008
 
 
b9cae72
 
 
a899a25
d4abba2
 
 
 
3275b57
b9cae72
e7f020c
 
 
 
d4abba2
3275b57
 
 
 
 
 
 
 
88adc6e
0a5d39a
d4abba2
8cc8a10
cb63445
3cd0174
aff03dd
 
 
3cd0174
 
 
 
3275b57
af6630f
3cd0174
d4abba2
0d398ac
d4abba2
390a5c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4c3e715
390a5c2
 
bbe3128
c335d03
 
 
 
31200cf
9b7bc38
31200cf
ab0b63c
 
 
9b7bc38
ab0b63c
 
 
 
88adc6e
 
ab0b63c
c335d03
6a5ab3f
bd103f5
 
 
 
 
d4abba2
bd103f5
 
 
 
a899a25
a141a8d
4c3e715
a141a8d
 
 
d4abba2
a141a8d
d4abba2
 
4c3e715
d4abba2
4c3e715
d4abba2
bd103f5
 
 
 
 
d25ab29
18d1282
6213803
6a5ab3f
 
18d1282
6213803
88adc6e
719ba38
18d1282
6213803
88adc6e
bd103f5
 
 
 
b9cae72
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
import pandas as pd
import ydata_profiling
import gradio as gr
from pydantic_settings import BaseSettings
from tempfile import NamedTemporaryFile
import sweetviz as sv
#from datatile.summary.df import DataFrameSummary
from autoviz.AutoViz_Class import AutoViz_Class
from traceml.summary.df import DataFrameSummary

def variable_table(df):
    """
    Analyzes a DataFrame and categorizes variables with colorful HTML formatting.
    
    Args:
      df (pandas.DataFrame): The DataFrame to analyze.
    
    Returns:
      str: HTML code representing the analysis results with colorful highlights.
    """
    # Analyze variable types
    numerical_vars=[]
    text_vars=[]
    categorical_vars=[]
    # Identify categorical, numerical, and text columns
    # Iterate over columns
    for col in df.columns:
        print(col )
        unique_values = df[col].nunique()
        total_values = df[col].count()
        dtype_kind = df[col].dtype.kind
        print( 'unique value-',unique_values,' total_values ',total_values,dtype_kind)
        # Check criteria for numerical, text, and categorical variables
        if (dtype_kind == 'f' or dtype_kind == 'i') and unique_values >= total_values / 10:
            numerical_vars.append(col)
            print('Numerical')
        elif (dtype_kind == 'O' or dtype_kind == 'b') and unique_values > total_values / 10:
            text_vars.append(col)
            print('Text')
        else:
            categorical_vars.append(col)
            print('categorical')
    print(categorical_vars,numerical_vars,text_vars)

    # Build HTML table with styles
    table_style = 'border: 1px solid #ddd; border-collapse: collapse; text-align: left; font-size: 14px;'
    header_style = 'background-color: #f2f2f2; padding: 5px 10px;'
    data_style = 'padding: 5px 10px; border-bottom: 1px solid #ddd;'
    category_color = '#90ee90'  # Light green for categorical
    numerical_color = '#add8e6'  # Light blue for numerical
    text_color = '#ffd9b3'  # Light yellow for text
    
    html = f"<table style='{table_style}'>"
    html += f"<tr><th style='{header_style}'>Variable Type</th><th style='{header_style}'>Columns</th></tr>"
    
    # Add rows for each variable type with coloring
    if categorical_vars:
        html += f"<tr style='background-color: {category_color};'><td>Categorical</td><td style='{data_style}'>{', '.join(categorical_vars)}</td></tr>"
    if numerical_vars:
        html += f"<tr style='background-color: {numerical_color};'><td>Numerical</td><td style='{data_style}'>{', '.join(numerical_vars)}</td></tr>"
    if text_vars:
        html += f"<tr style='background-color: {text_color};'><td>Text</td><td style='{data_style}'>{', '.join(text_vars)}</td></tr>"
    
    # Handle cases where no variables are found
    if not (categorical_vars or numerical_vars or text_vars):
        html += "<tr><td>No variables found!</td></tr>"
    
    html += "</table>"
    return html

def generate_report(file, type):
    df = pd.read_csv(file) if file.name.endswith(".csv") else pd.read_excel(file)

    pandas_html_report =ydata_profiling.ProfileReport(df).to_html() 
    temp_file1 = NamedTemporaryFile(delete=False, suffix=".html")
    temp_file1.write(pandas_html_report.encode('utf-8'))
    temp_file1.close()
    print('Pandas completed')
    
    # dataprep_report = create_report(df)
    # temp_file2 = NamedTemporaryFile(delete=False, suffix=".html")
    # temp_file2.write(dataprep_report.encode('utf-8'))
    # temp_file2.close()

    def convert_mixed_to_str(df):
  
      mixed_cols = df.select_dtypes(include=['object']).columns  # Detect object dtype (usually mixed)
      for col in mixed_cols:
        df[col] = df[col].astype(str)  # Convert mixed columns to strings
      return df

    df = convert_mixed_to_str(df.copy())
    df.columns = df.columns.str.upper()

    sweetviz_report = sv.analyze(df)
    #sweetviz_report=sweetviz_report.show_html()
    #print(type(sweetviz_report))
    report=sweetviz_report.show_html(  filepath='SWEETVIZ_REPORT.html', 
            open_browser=False, 
            layout='widescreen', 
            scale=None)
    HTMLFileToBeOpened = open('SWEETVIZ_REPORT.html', "r") 
  
    # Reading the file and storing in a variable 
    contents = HTMLFileToBeOpened.read() 
    print('Pandas completed')
    
  
    temp_file3 = NamedTemporaryFile(delete=False, suffix=".html")
    temp_file3.write(contents.encode('utf-8'))
    temp_file3.close()
    try:
        #AUTOVIZ
        AV = AutoViz_Class()
    
        dfviz = AV.AutoViz(
        "",
        sep=",",
        depVar="",
        dfte=df,
        header=0,
        verbose=2,
        lowess=False,
        chart_format="html",
        max_rows_analyzed=500,
        max_cols_analyzed=20,
        save_plot_dir="plots"
        )
    except Exception as e:
        dfviz=df
        print(e)
        print('Exception during Autoviz')

    
    
   
    dfs = DataFrameSummary(df)
    sd=dfs.summary()
    sd.index.name = 'PARAMETERS'
    sd1= sd.reset_index(drop=False)
    def highlight_cols(x): 
        df = x.copy() 
        df.loc[:, :] = 'color: purple'
        df[['PARAMETERS']] = 'color: red'
        return df 

    # Applying the style function
    sd1 = sd1.style.apply(highlight_cols, axis = None)
    sd1.columns = sd1.columns.str.upper()


    
    return temp_file1.name  ,temp_file3.name ,variable_table(df),dfviz,sd1
def handle_click(file, sheet_name):
            sheet_dropdown.update(disabled=True)  # Disable during report generation
            results = generate_report(file, sheet_name)
            sheet_dropdown.update(disabled=False)  # Re-enable sheet selection
            return results
    
def update_sheets(file):
    sheet_names = pd.ExcelFile(file).sheet_names
    sheet_dropdown.update(options=sheet_names, disabled=False)

with gr.Blocks() as cluster:
    with gr.Column():
        gr.HTML(value="""<h1 style="color: #1999FF; text-shadow: 1px 1px 2px #ddd;">EXCEL ANALYSIS AND INSIGHTS</h1>""")
        with gr.Row():
            file=gr.File(file_types=['.csv', '.xlsx'], label="Upload a CSV or Excel file")
            btn=gr.Button(value="Download Report") 
            
        with gr.Row():
            
            
            gr.HTML(value="""<h2 style="color: #3399FF; text-shadow: 1px 1px 2px #ddd;">PANDAS REPORT</h2>""")
            out1=gr.File(label="Download CSV")
            gr.HTML(value="""<h2 style="color: #3399FF; text-shadow: 1px 1px 2px #ddd;">SWEETVIZ REPORT</h2>""")
            out2=gr.File(label="Download CSV")
        sheet_dropdown = gr.Dropdown(
            label="Select sheet", options=[], disabled=True  # Initially disabled
        )
    
        
        with gr.Column():
            gr.HTML(value="""<h2 style="color: #BA4A00; text-shadow: 1px 1px 2px #ddd;"COLUMNS IN FILE</h2>""")
            #gr.Markdown("***Uploaded File***")
            var=gr.HTML()
        with gr.Column():
            gr.HTML(value="""<h2 style="color: #BA4A00; text-shadow: 1px 1px 2px #ddd;"UPLOADED FILE</h2>""")
            #gr.Markdown("#Uploaded File")
            dataframe1=gr.Dataframe()
        with gr.Column():
            gr.HTML(value="""<h2 style="color: #BA4A00; text-shadow: 1px 1px 2px #ddd;"COLUMNS STATS AND ANALYSIS</h2>""")
            #gr.Markdown("#Columns Analysis")
            dataframe2=gr.Dataframe()
    
    btn.click(handle_click, inputs=[file, sheet_dropdown], outputs=[out1, out2, var, dataframe1, dataframe2])
    file.observe(update_sheets, names=["data"])
    #btn.click(generate_report,inputs=[file],outputs=[out1,out2,var,dataframe1,dataframe2])
cluster.launch()