File size: 10,730 Bytes
0a5d39a
7a75289
0a5d39a
71c9026
dd375ae
1065d81
e8748ad
bbe3128
 
 
24047b3
d250988
c335d03
6a5ab3f
68c1008
 
 
 
6a5ab3f
68c1008
 
6a5ab3f
68c1008
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a5ab3f
68c1008
 
 
 
 
 
 
 
 
 
 
 
 
5bc9631
68c1008
5bc9631
68c1008
5bc9631
68c1008
 
 
5bc9631
68c1008
 
 
b9cae72
 
 
d250988
a899a25
d4abba2
 
 
 
3275b57
b9cae72
e7f020c
 
 
 
d4abba2
3275b57
 
 
 
 
 
 
 
88adc6e
d250988
d4abba2
8cc8a10
cb63445
3cd0174
aff03dd
 
 
3cd0174
 
 
 
3275b57
af6630f
3cd0174
d4abba2
0d398ac
d4abba2
e9a3347
390a5c2
d21feaf
123707b
 
8c65d6c
 
0435627
031a38c
8c65d6c
 
 
 
 
1e376b8
8c65d6c
1e376b8
123707b
8c65d6c
 
 
031a38c
8c65d6c
 
 
 
 
1e376b8
8c65d6c
390a5c2
4c3e715
390a5c2
 
bbe3128
8c65d6c
 
123707b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c335d03
 
31200cf
9b7bc38
31200cf
ab0b63c
 
 
9b7bc38
ab0b63c
 
 
 
88adc6e
 
ab0b63c
c335d03
9669d6d
d250988
 
 
 
0fe990c
bd103f5
bc3054f
0fe990c
 
d4abba2
bd103f5
 
0fe990c
bc3054f
0fe990c
 
 
 
 
bd103f5
a899a25
a141a8d
4c3e715
a141a8d
 
 
d4abba2
a141a8d
d4abba2
 
4c3e715
d4abba2
4c3e715
d4abba2
bd103f5
24047b3
bd103f5
 
 
d25ab29
18d1282
6213803
6a5ab3f
 
18d1282
6213803
88adc6e
719ba38
18d1282
6213803
88adc6e
d250988
 
 
 
 
 
 
 
 
 
 
 
123707b
d250988
123707b
24047b3
123707b
 
 
 
 
 
 
 
 
79ccabc
123707b
 
 
 
 
 
 
 
 
 
 
b712f21
0fe990c
bd103f5
24047b3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
import pandas as pd
import ydata_profiling
import gradio as gr
from pydantic_settings import BaseSettings
from tempfile import NamedTemporaryFile
import sweetviz as sv
import sketch
#from datatile.summary.df import DataFrameSummary
from autoviz.AutoViz_Class import AutoViz_Class
from traceml.summary.df import DataFrameSummary
import os
df1=gr.State()

def variable_table(df):
    """
    Analyzes a DataFrame and categorizes variables with colorful HTML formatting.
    
    Args:
      df (pandas.DataFrame): The DataFrame to analyze.
    
    Returns:
      str: HTML code representing the analysis results with colorful highlights.
    """
    # Analyze variable types
    numerical_vars=[]
    text_vars=[]
    categorical_vars=[]
    # Identify categorical, numerical, and text columns
    # Iterate over columns
    for col in df.columns:
        print(col )
        unique_values = df[col].nunique()
        total_values = df[col].count()
        dtype_kind = df[col].dtype.kind
        print( 'unique value-',unique_values,' total_values ',total_values,dtype_kind)
        # Check criteria for numerical, text, and categorical variables
        if (dtype_kind == 'f' or dtype_kind == 'i') and unique_values >= total_values / 10:
            numerical_vars.append(col)
            print('Numerical')
        elif (dtype_kind == 'O' or dtype_kind == 'b') and unique_values > total_values / 10:
            text_vars.append(col)
            print('Text')
        else:
            categorical_vars.append(col)
            print('categorical')
    print(categorical_vars,numerical_vars,text_vars)

    # Build HTML table with styles
    table_style = 'border: 1px solid #ddd; border-collapse: collapse; text-align: left; font-size: 14px;'
    header_style = 'background-color: #f2f2f2; padding: 5px 10px;'
    data_style = 'padding: 5px 10px; border-bottom: 1px solid #ddd;'
    category_color = '#90ee90'  # Light green for categorical
    numerical_color = '#add8e6'  # Light blue for numerical
    text_color = '#ffd9b3'  # Light yellow for text
    
    html = f"<table style='{table_style}'>"
    html += f"<tr><th style='{header_style}'>Variable Type</th><th style='{header_style}'>Columns</th></tr>"
    
    # Add rows for each variable type with coloring
    if categorical_vars:
        html += f"<tr style='background-color: {category_color};'><td>Categorical</td><td style='{data_style}'>{', '.join(categorical_vars)}</td></tr>"
    if numerical_vars:
        html += f"<tr style='background-color: {numerical_color};'><td>Numerical</td><td style='{data_style}'>{', '.join(numerical_vars)}</td></tr>"
    if text_vars:
        html += f"<tr style='background-color: {text_color};'><td>Text</td><td style='{data_style}'>{', '.join(text_vars)}</td></tr>"
    
    # Handle cases where no variables are found
    if not (categorical_vars or numerical_vars or text_vars):
        html += "<tr><td>No variables found!</td></tr>"
    
    html += "</table>"
    return html

def generate_report(file, type):
    df = pd.read_csv(file) if file.name.endswith(".csv") else pd.read_excel(file)
    

    pandas_html_report =ydata_profiling.ProfileReport(df).to_html() 
    temp_file1 = NamedTemporaryFile(delete=False, suffix=".html")
    temp_file1.write(pandas_html_report.encode('utf-8'))
    temp_file1.close()
    print('Pandas completed')
    
    # dataprep_report = create_report(df)
    # temp_file2 = NamedTemporaryFile(delete=False, suffix=".html")
    # temp_file2.write(dataprep_report.encode('utf-8'))
    # temp_file2.close()

    def convert_mixed_to_str(df):
  
      mixed_cols = df.select_dtypes(include=['object']).columns  # Detect object dtype (usually mixed)
      for col in mixed_cols:
        df[col] = df[col].astype(str)  # Convert mixed columns to strings
      return df

    df = convert_mixed_to_str(df.copy())
    df.columns = df.columns.str.upper()
    df1.value=df
    sweetviz_report = sv.analyze(df)
    #sweetviz_report=sweetviz_report.show_html()
    #print(type(sweetviz_report))
    report=sweetviz_report.show_html(  filepath='SWEETVIZ_REPORT.html', 
            open_browser=False, 
            layout='widescreen', 
            scale=None)
    HTMLFileToBeOpened = open('SWEETVIZ_REPORT.html', "r") 
  
    # Reading the file and storing in a variable 
    contents = HTMLFileToBeOpened.read() 
    print('Pandas completed')
    
  
    temp_file3 = NamedTemporaryFile(delete=False, suffix=".html")
    temp_file3.write(contents.encode('utf-8'))
    temp_file3.close()
    dfviz=df
    try:
        AV = AutoViz_Class()
        dft = AV.AutoViz(filename="", sep=',', dfte=df, header=0, verbose=1,
                        lowess=False,chart_format='html',max_rows_analyzed=150000,
                         max_cols_analyzed=30,save_plot_dir=None)
        reports = []
        print(os.listdir("./"))
        output_dir_html="./AutoViz_Plots/AutoViz"
        for filename in os.listdir(output_dir_html):
            if filename.endswith(".html"):
                title = os.path.splitext(filename)[0]  # Extract title from filename (without extension)
                reports.append(title)
                reports.append(output_dir_html+'/'+filename)
                print(reports)
                
        dft = AV.AutoViz(filename='', sep=',', header=0, verbose=2,dfte=df,
                      lowess=False,chart_format='png',max_rows_analyzed=150000,
                      max_cols_analyzed=30,save_plot_dir=None)
        
        imgs = []
        output_dir_img="./AutoViz_Plots/AutoViz"
        for filename in os.listdir(output_dir_img):
            if filename.endswith(".png"):
                title = os.path.splitext(filename)[0]  # Extract title from filename (without extension)
                imgs.append(title)
                imgs.append(output_dir_img+'/'+filename)
                print(imgs)
                
    except Exception as e:
        dfviz=df
        print(e)
        print('Exception during Autoviz')

 
   
    while len(reports)<10:
          # Comparing the lengths and appending t and m to output if needed

          t = gr.Markdown(visible=False)
          m = gr.File(visible=False)
          reports.append(t)
          reports.append(m)
          
    while len(imgs)<10:
          # Comparing the lengths and appending t and m to output if needed
    
          t = gr.Markdown(visible=False)
          m = gr.Image(visible=False)
          imgs.append(t)
          imgs.append(m)   
   
    dfs = DataFrameSummary(df)
    sd=dfs.summary()
    sd.index.name = 'PARAMETERS'
    sd1= sd.reset_index(drop=False)
    def highlight_cols(x): 
        df = x.copy() 
        df.loc[:, :] = 'color: purple'
        df[['PARAMETERS']] = 'color: red'
        return df 

    # Applying the style function
    sd1 = sd1.style.apply(highlight_cols, axis = None)
    sd1.columns = sd1.columns.str.upper()


    
    return [temp_file1.name  ,temp_file3.name ,variable_table(df),dfviz,sd1]+reports+imgs

def query_chat(query):
    df=df1.value
    return df.sketch.ask(query)
    
def handle_click(file, sheet_name):
    print('Handle click')  
    results = generate_report(file, sheet_name)
    return results
    
def update_sheets(file):
    sheet_names = pd.ExcelFile(file).sheet_names
    
    print('Sheet updated')
    sheet_dropdown = gr.Dropdown(
            label="Select sheet", choices=[sheet_names], visible=True  # Initially disabled
        )
    return sheet_dropdown
    

with gr.Blocks() as cluster:
    with gr.Column():
        gr.HTML(value="""<h1 style="color: #1999FF; text-shadow: 1px 1px 2px #ddd;">EXCEL ANALYSIS AND INSIGHTS</h1>""")
        with gr.Row():
            file=gr.File(file_types=['.csv', '.xlsx'], label="Upload a CSV or Excel file")
            btn=gr.Button(value="Download Report") 
            
        with gr.Row():
            
            
            gr.HTML(value="""<h2 style="color: #3399FF; text-shadow: 1px 1px 2px #ddd;">PANDAS REPORT</h2>""")
            out1=gr.File(label="Download CSV")
            gr.HTML(value="""<h2 style="color: #3399FF; text-shadow: 1px 1px 2px #ddd;">SWEETVIZ REPORT</h2>""")
            out2=gr.File(label="Download CSV")
        sheet_dropdown = gr.Dropdown(
            label="Select sheet", choices=[], visible=False  # Initially disabled
        )
    
        
        with gr.Column():
            gr.HTML(value="""<h2 style="color: #BA4A00; text-shadow: 1px 1px 2px #ddd;"COLUMNS IN FILE</h2>""")
            #gr.Markdown("***Uploaded File***")
            var=gr.HTML()
        with gr.Column():
            gr.HTML(value="""<h2 style="color: #BA4A00; text-shadow: 1px 1px 2px #ddd;"UPLOADED FILE</h2>""")
            #gr.Markdown("#Uploaded File")
            dataframe1=gr.Dataframe()
        with gr.Column():
            gr.HTML(value="""<h2 style="color: #BA4A00; text-shadow: 1px 1px 2px #ddd;"COLUMNS STATS AND ANALYSIS</h2>""")
            #gr.Markdown("#Columns Analysis")
            dataframe2=gr.Dataframe()

        with gr.Column():
            gr.HTML(value="""<h2 style="color: #BA4A00; text-shadow: 1px 1px 2px #ddd;"ASK QUESTIONS ON THE COLUMNS </h2>""")
            with gr.Row():
                query=gr.Textbox(label="Enter any question on data columns",
                info="Natural language query",
                lines=1,
                value="What is maximum value of first column")
                outlabel=gr.HighlightedText(label='Query Result ')
            query_btn=gr.Button(value='Submit Query')
            query_btn.click(query_chat,inputs=query,outputs=outlabel)
            
            

        #Autoviz insights graphs
  
        gr.HTML(value="""<h3 style="color: #1999FF; text-shadow: 1px 1px 2px #ddd;">Insight Visualisations</h3>""")
        with gr.Column():
          plots2 = []
          for i in range(5):
              t = gr.Markdown()
              with gr.Column(scale=1):
                m=gr.Image()
                plots2.append(t)
                plots2.append(m)
                print(type(plots2[0]))
        gr.HTML(value="""<h3 style="color: #1999FF; text-shadow: 1px 1px 2px #ddd;">Insight graphs</h3>""")
        with gr.Row():           
          plots1 = []
          for i in range(5):
            with gr.Row():
              t = gr.Markdown()
              m=gr.File()
              plots1.append(t)
              plots1.append(m)
              print(type(plots1[0]))
    
    btn.click(handle_click, inputs=[file, sheet_dropdown], outputs=[out1, out2, var,dataframe1, dataframe2]+plots1+plots2)
    file.change(update_sheets, inputs=file, outputs=sheet_dropdown) 
    #btn.click(generate_report,inputs=[file],outputs=[out1,out2,var,dataframe1,dataframe2])
cluster.launch(debug=True)