File size: 7,629 Bytes
0a5d39a 7a75289 0a5d39a 71c9026 dd375ae 1065d81 bbe3128 c335d03 6a5ab3f 68c1008 6a5ab3f 68c1008 6a5ab3f 68c1008 6a5ab3f 68c1008 5bc9631 68c1008 5bc9631 68c1008 5bc9631 68c1008 5bc9631 68c1008 b9cae72 a899a25 d4abba2 3275b57 b9cae72 e7f020c d4abba2 3275b57 88adc6e 0a5d39a d4abba2 8cc8a10 cb63445 3cd0174 aff03dd 3cd0174 3275b57 af6630f 3cd0174 d4abba2 0d398ac d4abba2 390a5c2 4c3e715 390a5c2 bbe3128 c335d03 31200cf 9b7bc38 31200cf ab0b63c 9b7bc38 ab0b63c 88adc6e ab0b63c c335d03 6a5ab3f bd103f5 d4abba2 bd103f5 a899a25 a141a8d 4c3e715 a141a8d d4abba2 a141a8d d4abba2 4c3e715 d4abba2 4c3e715 d4abba2 bd103f5 d25ab29 18d1282 6213803 6a5ab3f 18d1282 6213803 88adc6e 719ba38 18d1282 6213803 88adc6e bd103f5 b9cae72 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 |
import pandas as pd
import ydata_profiling
import gradio as gr
from pydantic_settings import BaseSettings
from tempfile import NamedTemporaryFile
import sweetviz as sv
#from datatile.summary.df import DataFrameSummary
from autoviz.AutoViz_Class import AutoViz_Class
from traceml.summary.df import DataFrameSummary
def variable_table(df):
"""
Analyzes a DataFrame and categorizes variables with colorful HTML formatting.
Args:
df (pandas.DataFrame): The DataFrame to analyze.
Returns:
str: HTML code representing the analysis results with colorful highlights.
"""
# Analyze variable types
numerical_vars=[]
text_vars=[]
categorical_vars=[]
# Identify categorical, numerical, and text columns
# Iterate over columns
for col in df.columns:
print(col )
unique_values = df[col].nunique()
total_values = df[col].count()
dtype_kind = df[col].dtype.kind
print( 'unique value-',unique_values,' total_values ',total_values,dtype_kind)
# Check criteria for numerical, text, and categorical variables
if (dtype_kind == 'f' or dtype_kind == 'i') and unique_values >= total_values / 10:
numerical_vars.append(col)
print('Numerical')
elif (dtype_kind == 'O' or dtype_kind == 'b') and unique_values > total_values / 10:
text_vars.append(col)
print('Text')
else:
categorical_vars.append(col)
print('categorical')
print(categorical_vars,numerical_vars,text_vars)
# Build HTML table with styles
table_style = 'border: 1px solid #ddd; border-collapse: collapse; text-align: left; font-size: 14px;'
header_style = 'background-color: #f2f2f2; padding: 5px 10px;'
data_style = 'padding: 5px 10px; border-bottom: 1px solid #ddd;'
category_color = '#90ee90' # Light green for categorical
numerical_color = '#add8e6' # Light blue for numerical
text_color = '#ffd9b3' # Light yellow for text
html = f"<table style='{table_style}'>"
html += f"<tr><th style='{header_style}'>Variable Type</th><th style='{header_style}'>Columns</th></tr>"
# Add rows for each variable type with coloring
if categorical_vars:
html += f"<tr style='background-color: {category_color};'><td>Categorical</td><td style='{data_style}'>{', '.join(categorical_vars)}</td></tr>"
if numerical_vars:
html += f"<tr style='background-color: {numerical_color};'><td>Numerical</td><td style='{data_style}'>{', '.join(numerical_vars)}</td></tr>"
if text_vars:
html += f"<tr style='background-color: {text_color};'><td>Text</td><td style='{data_style}'>{', '.join(text_vars)}</td></tr>"
# Handle cases where no variables are found
if not (categorical_vars or numerical_vars or text_vars):
html += "<tr><td>No variables found!</td></tr>"
html += "</table>"
return html
def generate_report(file, type):
df = pd.read_csv(file) if file.name.endswith(".csv") else pd.read_excel(file)
pandas_html_report =ydata_profiling.ProfileReport(df).to_html()
temp_file1 = NamedTemporaryFile(delete=False, suffix=".html")
temp_file1.write(pandas_html_report.encode('utf-8'))
temp_file1.close()
print('Pandas completed')
# dataprep_report = create_report(df)
# temp_file2 = NamedTemporaryFile(delete=False, suffix=".html")
# temp_file2.write(dataprep_report.encode('utf-8'))
# temp_file2.close()
def convert_mixed_to_str(df):
mixed_cols = df.select_dtypes(include=['object']).columns # Detect object dtype (usually mixed)
for col in mixed_cols:
df[col] = df[col].astype(str) # Convert mixed columns to strings
return df
df = convert_mixed_to_str(df.copy())
df.columns = df.columns.str.upper()
sweetviz_report = sv.analyze(df)
#sweetviz_report=sweetviz_report.show_html()
#print(type(sweetviz_report))
report=sweetviz_report.show_html( filepath='SWEETVIZ_REPORT.html',
open_browser=False,
layout='widescreen',
scale=None)
HTMLFileToBeOpened = open('SWEETVIZ_REPORT.html', "r")
# Reading the file and storing in a variable
contents = HTMLFileToBeOpened.read()
print('Pandas completed')
temp_file3 = NamedTemporaryFile(delete=False, suffix=".html")
temp_file3.write(contents.encode('utf-8'))
temp_file3.close()
try:
#AUTOVIZ
AV = AutoViz_Class()
dfviz = AV.AutoViz(
"",
sep=",",
depVar="",
dfte=df,
header=0,
verbose=2,
lowess=False,
chart_format="html",
max_rows_analyzed=500,
max_cols_analyzed=20,
save_plot_dir="plots"
)
except Exception as e:
dfviz=df
print(e)
print('Exception during Autoviz')
dfs = DataFrameSummary(df)
sd=dfs.summary()
sd.index.name = 'PARAMETERS'
sd1= sd.reset_index(drop=False)
def highlight_cols(x):
df = x.copy()
df.loc[:, :] = 'color: purple'
df[['PARAMETERS']] = 'color: red'
return df
# Applying the style function
sd1 = sd1.style.apply(highlight_cols, axis = None)
sd1.columns = sd1.columns.str.upper()
return temp_file1.name ,temp_file3.name ,variable_table(df),dfviz,sd1
def handle_click(file, sheet_name):
sheet_dropdown.update(disabled=True) # Disable during report generation
results = generate_report(file, sheet_name)
sheet_dropdown.update(disabled=False) # Re-enable sheet selection
return results
def update_sheets(file):
sheet_names = pd.ExcelFile(file).sheet_names
sheet_dropdown.update(options=sheet_names, disabled=False)
with gr.Blocks() as cluster:
with gr.Column():
gr.HTML(value="""<h1 style="color: #1999FF; text-shadow: 1px 1px 2px #ddd;">EXCEL ANALYSIS AND INSIGHTS</h1>""")
with gr.Row():
file=gr.File(file_types=['.csv', '.xlsx'], label="Upload a CSV or Excel file")
btn=gr.Button(value="Download Report")
with gr.Row():
gr.HTML(value="""<h2 style="color: #3399FF; text-shadow: 1px 1px 2px #ddd;">PANDAS REPORT</h2>""")
out1=gr.File(label="Download CSV")
gr.HTML(value="""<h2 style="color: #3399FF; text-shadow: 1px 1px 2px #ddd;">SWEETVIZ REPORT</h2>""")
out2=gr.File(label="Download CSV")
sheet_dropdown = gr.Dropdown(
label="Select sheet", options=[], disabled=True # Initially disabled
)
with gr.Column():
gr.HTML(value="""<h2 style="color: #BA4A00; text-shadow: 1px 1px 2px #ddd;"COLUMNS IN FILE</h2>""")
#gr.Markdown("***Uploaded File***")
var=gr.HTML()
with gr.Column():
gr.HTML(value="""<h2 style="color: #BA4A00; text-shadow: 1px 1px 2px #ddd;"UPLOADED FILE</h2>""")
#gr.Markdown("#Uploaded File")
dataframe1=gr.Dataframe()
with gr.Column():
gr.HTML(value="""<h2 style="color: #BA4A00; text-shadow: 1px 1px 2px #ddd;"COLUMNS STATS AND ANALYSIS</h2>""")
#gr.Markdown("#Columns Analysis")
dataframe2=gr.Dataframe()
btn.click(handle_click, inputs=[file, sheet_dropdown], outputs=[out1, out2, var, dataframe1, dataframe2])
file.observe(update_sheets, names=["data"])
#btn.click(generate_report,inputs=[file],outputs=[out1,out2,var,dataframe1,dataframe2])
cluster.launch()
|