File size: 9,994 Bytes
0a5d39a 7a75289 0a5d39a 71c9026 dd375ae 1065d81 bbe3128 24047b3 c335d03 6a5ab3f 68c1008 6a5ab3f 68c1008 6a5ab3f 68c1008 6a5ab3f 68c1008 5bc9631 68c1008 5bc9631 68c1008 5bc9631 68c1008 5bc9631 68c1008 b9cae72 a899a25 d4abba2 3275b57 b9cae72 e7f020c d4abba2 3275b57 88adc6e 0a5d39a d4abba2 8cc8a10 cb63445 3cd0174 aff03dd 3cd0174 3275b57 af6630f 3cd0174 d4abba2 0d398ac d4abba2 e9a3347 390a5c2 d21feaf 123707b 8c65d6c 0435627 cb91710 8c65d6c 1e376b8 8c65d6c 1e376b8 123707b 8c65d6c cb91710 8c65d6c 1e376b8 8c65d6c 390a5c2 4c3e715 390a5c2 bbe3128 8c65d6c 123707b c335d03 31200cf 9b7bc38 31200cf ab0b63c 9b7bc38 ab0b63c 88adc6e ab0b63c c335d03 9669d6d 0fe990c bd103f5 bc3054f 0fe990c d4abba2 bd103f5 0fe990c bc3054f 0fe990c bd103f5 a899a25 a141a8d 4c3e715 a141a8d d4abba2 a141a8d d4abba2 4c3e715 d4abba2 4c3e715 d4abba2 bd103f5 24047b3 bd103f5 d25ab29 18d1282 6213803 6a5ab3f 18d1282 6213803 88adc6e 719ba38 18d1282 6213803 88adc6e 123707b 24047b3 123707b 79ccabc 123707b b712f21 0fe990c bd103f5 24047b3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 |
import pandas as pd
import ydata_profiling
import gradio as gr
from pydantic_settings import BaseSettings
from tempfile import NamedTemporaryFile
import sweetviz as sv
#from datatile.summary.df import DataFrameSummary
from autoviz.AutoViz_Class import AutoViz_Class
from traceml.summary.df import DataFrameSummary
import os
def variable_table(df):
"""
Analyzes a DataFrame and categorizes variables with colorful HTML formatting.
Args:
df (pandas.DataFrame): The DataFrame to analyze.
Returns:
str: HTML code representing the analysis results with colorful highlights.
"""
# Analyze variable types
numerical_vars=[]
text_vars=[]
categorical_vars=[]
# Identify categorical, numerical, and text columns
# Iterate over columns
for col in df.columns:
print(col )
unique_values = df[col].nunique()
total_values = df[col].count()
dtype_kind = df[col].dtype.kind
print( 'unique value-',unique_values,' total_values ',total_values,dtype_kind)
# Check criteria for numerical, text, and categorical variables
if (dtype_kind == 'f' or dtype_kind == 'i') and unique_values >= total_values / 10:
numerical_vars.append(col)
print('Numerical')
elif (dtype_kind == 'O' or dtype_kind == 'b') and unique_values > total_values / 10:
text_vars.append(col)
print('Text')
else:
categorical_vars.append(col)
print('categorical')
print(categorical_vars,numerical_vars,text_vars)
# Build HTML table with styles
table_style = 'border: 1px solid #ddd; border-collapse: collapse; text-align: left; font-size: 14px;'
header_style = 'background-color: #f2f2f2; padding: 5px 10px;'
data_style = 'padding: 5px 10px; border-bottom: 1px solid #ddd;'
category_color = '#90ee90' # Light green for categorical
numerical_color = '#add8e6' # Light blue for numerical
text_color = '#ffd9b3' # Light yellow for text
html = f"<table style='{table_style}'>"
html += f"<tr><th style='{header_style}'>Variable Type</th><th style='{header_style}'>Columns</th></tr>"
# Add rows for each variable type with coloring
if categorical_vars:
html += f"<tr style='background-color: {category_color};'><td>Categorical</td><td style='{data_style}'>{', '.join(categorical_vars)}</td></tr>"
if numerical_vars:
html += f"<tr style='background-color: {numerical_color};'><td>Numerical</td><td style='{data_style}'>{', '.join(numerical_vars)}</td></tr>"
if text_vars:
html += f"<tr style='background-color: {text_color};'><td>Text</td><td style='{data_style}'>{', '.join(text_vars)}</td></tr>"
# Handle cases where no variables are found
if not (categorical_vars or numerical_vars or text_vars):
html += "<tr><td>No variables found!</td></tr>"
html += "</table>"
return html
def generate_report(file, type):
df = pd.read_csv(file) if file.name.endswith(".csv") else pd.read_excel(file)
pandas_html_report =ydata_profiling.ProfileReport(df).to_html()
temp_file1 = NamedTemporaryFile(delete=False, suffix=".html")
temp_file1.write(pandas_html_report.encode('utf-8'))
temp_file1.close()
print('Pandas completed')
# dataprep_report = create_report(df)
# temp_file2 = NamedTemporaryFile(delete=False, suffix=".html")
# temp_file2.write(dataprep_report.encode('utf-8'))
# temp_file2.close()
def convert_mixed_to_str(df):
mixed_cols = df.select_dtypes(include=['object']).columns # Detect object dtype (usually mixed)
for col in mixed_cols:
df[col] = df[col].astype(str) # Convert mixed columns to strings
return df
df = convert_mixed_to_str(df.copy())
df.columns = df.columns.str.upper()
sweetviz_report = sv.analyze(df)
#sweetviz_report=sweetviz_report.show_html()
#print(type(sweetviz_report))
report=sweetviz_report.show_html( filepath='SWEETVIZ_REPORT.html',
open_browser=False,
layout='widescreen',
scale=None)
HTMLFileToBeOpened = open('SWEETVIZ_REPORT.html', "r")
# Reading the file and storing in a variable
contents = HTMLFileToBeOpened.read()
print('Pandas completed')
temp_file3 = NamedTemporaryFile(delete=False, suffix=".html")
temp_file3.write(contents.encode('utf-8'))
temp_file3.close()
dfviz=df
try:
AV = AutoViz_Class()
dft = AV.AutoViz(filename="", sep=',', dfte=df, header=0, verbose=1,
lowess=False,chart_format='html',max_rows_analyzed=150000,
max_cols_analyzed=30,save_plot_dir=None)
reports = []
print(os.listdir("./"))
output_dir_html="./AutoViz_Plots"
for filename in os.listdir(output_dir_html):
if filename.endswith(".html"):
title = os.path.splitext(filename)[0] # Extract title from filename (without extension)
reports.append(title)
reports.append(output_dir_html+'/'+filename)
print(reports)
dft = AV.AutoViz(filename='', sep=',', header=0, verbose=2,dfte=df,
lowess=False,chart_format='png',max_rows_analyzed=150000,
max_cols_analyzed=30,save_plot_dir=None)
imgs = []
output_dir_img="./AutoViz_Plots"
for filename in os.listdir(output_dir_img):
if filename.endswith(".png"):
title = os.path.splitext(filename)[0] # Extract title from filename (without extension)
imgs.append(title)
imgs.append(output_dir_img+'/'+filename)
print(imgs)
except Exception as e:
dfviz=df
print(e)
print('Exception during Autoviz')
while len(reports)<10:
# Comparing the lengths and appending t and m to output if needed
t = gr.Markdown(visible=False)
m = gr.File(visible=False)
reports.append(t)
reports.append(m)
while len(imgs)<10:
# Comparing the lengths and appending t and m to output if needed
t = gr.Markdown(visible=False)
m = gr.Image(visible=False)
imgs.append(t)
imgs.append(m)
dfs = DataFrameSummary(df)
sd=dfs.summary()
sd.index.name = 'PARAMETERS'
sd1= sd.reset_index(drop=False)
def highlight_cols(x):
df = x.copy()
df.loc[:, :] = 'color: purple'
df[['PARAMETERS']] = 'color: red'
return df
# Applying the style function
sd1 = sd1.style.apply(highlight_cols, axis = None)
sd1.columns = sd1.columns.str.upper()
return [temp_file1.name ,temp_file3.name ,variable_table(df),dfviz,sd1]+reports+imgs
def handle_click(file, sheet_name):
print('Handle click')
results = generate_report(file, sheet_name)
return results
def update_sheets(file):
sheet_names = pd.ExcelFile(file).sheet_names
print('Sheet updated')
sheet_dropdown = gr.Dropdown(
label="Select sheet", choices=[sheet_names], visible=True # Initially disabled
)
return sheet_dropdown
with gr.Blocks() as cluster:
with gr.Column():
gr.HTML(value="""<h1 style="color: #1999FF; text-shadow: 1px 1px 2px #ddd;">EXCEL ANALYSIS AND INSIGHTS</h1>""")
with gr.Row():
file=gr.File(file_types=['.csv', '.xlsx'], label="Upload a CSV or Excel file")
btn=gr.Button(value="Download Report")
with gr.Row():
gr.HTML(value="""<h2 style="color: #3399FF; text-shadow: 1px 1px 2px #ddd;">PANDAS REPORT</h2>""")
out1=gr.File(label="Download CSV")
gr.HTML(value="""<h2 style="color: #3399FF; text-shadow: 1px 1px 2px #ddd;">SWEETVIZ REPORT</h2>""")
out2=gr.File(label="Download CSV")
sheet_dropdown = gr.Dropdown(
label="Select sheet", choices=[], visible=False # Initially disabled
)
with gr.Column():
gr.HTML(value="""<h2 style="color: #BA4A00; text-shadow: 1px 1px 2px #ddd;"COLUMNS IN FILE</h2>""")
#gr.Markdown("***Uploaded File***")
var=gr.HTML()
with gr.Column():
gr.HTML(value="""<h2 style="color: #BA4A00; text-shadow: 1px 1px 2px #ddd;"UPLOADED FILE</h2>""")
#gr.Markdown("#Uploaded File")
dataframe1=gr.Dataframe()
with gr.Column():
gr.HTML(value="""<h2 style="color: #BA4A00; text-shadow: 1px 1px 2px #ddd;"COLUMNS STATS AND ANALYSIS</h2>""")
#gr.Markdown("#Columns Analysis")
dataframe2=gr.Dataframe()
#Autoviz insights graphs
gr.HTML(value="""<h3 style="color: #1999FF; text-shadow: 1px 1px 2px #ddd;">Insight Visualisations</h3>""")
with gr.Column():
plots2 = []
for i in range(5):
t = gr.Markdown()
with gr.Column(scale=1):
m=gr.Image()
plots2.append(t)
plots2.append(m)
print(type(plots2[0]))
gr.HTML(value="""<h3 style="color: #1999FF; text-shadow: 1px 1px 2px #ddd;">Insight graphs</h3>""")
with gr.Row():
plots1 = []
for i in range(5):
with gr.Row():
t = gr.Markdown()
m=gr.File()
plots1.append(t)
plots1.append(m)
print(type(plots1[0]))
btn.click(handle_click, inputs=[file, sheet_dropdown], outputs=[out1, out2, var,dataframe1, dataframe2]+plots1+plots2)
file.change(update_sheets, inputs=file, outputs=sheet_dropdown)
#btn.click(generate_report,inputs=[file],outputs=[out1,out2,var,dataframe1,dataframe2])
cluster.launch(debug=True)
|