|
import pandas as pd |
|
import ydata_profiling |
|
import gradio as gr |
|
from pydantic_settings import BaseSettings |
|
from tempfile import NamedTemporaryFile |
|
import sweetviz as sv |
|
|
|
from autoviz.AutoViz_Class import AutoViz_Class |
|
from traceml.summary.df import DataFrameSummary |
|
|
|
|
|
def generate_report(file, type): |
|
df = pd.read_csv(file) if file.name.endswith(".csv") else pd.read_excel(file) |
|
|
|
pandas_html_report =ydata_profiling.ProfileReport(df).to_html() |
|
temp_file1 = NamedTemporaryFile(delete=False, suffix=".html") |
|
temp_file1.write(pandas_html_report.encode('utf-8')) |
|
temp_file1.close() |
|
print('Pandas completed') |
|
|
|
|
|
|
|
|
|
|
|
|
|
def convert_mixed_to_str(df): |
|
|
|
mixed_cols = df.select_dtypes(include=['object']).columns |
|
for col in mixed_cols: |
|
df[col] = df[col].astype(str) |
|
return df |
|
|
|
df = convert_mixed_to_str(df.copy()) |
|
|
|
sweetviz_report = sv.analyze(df) |
|
|
|
|
|
report=sweetviz_report.show_html( filepath='SWEETVIZ_REPORT.html', |
|
open_browser=False, |
|
layout='widescreen', |
|
scale=None) |
|
HTMLFileToBeOpened = open('SWEETVIZ_REPORT.html', "r") |
|
|
|
|
|
contents = HTMLFileToBeOpened.read() |
|
print('Pandas completed') |
|
|
|
|
|
temp_file3 = NamedTemporaryFile(delete=False, suffix=".html") |
|
temp_file3.write(contents.encode('utf-8')) |
|
temp_file3.close() |
|
|
|
|
|
AV = AutoViz_Class() |
|
|
|
dfviz = AV.AutoViz( |
|
"", |
|
sep=",", |
|
depVar="", |
|
dfte=df, |
|
header=0, |
|
verbose=2, |
|
lowess=False, |
|
chart_format="html", |
|
max_rows_analyzed=500, |
|
max_cols_analyzed=20, |
|
save_plot_dir="plots" |
|
) |
|
|
|
|
|
|
|
|
|
dfs = DataFrameSummary(df) |
|
sd=dfs.summary() |
|
sd.index.name = 'Parameters' |
|
sd1= sd.reset_index(drop=False) |
|
|
|
return temp_file1.name ,temp_file3.name ,dfviz,sd1 |
|
|
|
|
|
with gr.Blocks() as cluster: |
|
with gr.Column(): |
|
|
|
with gr.Row(): |
|
file=gr.File(file_types=['.csv', '.xlsx'], label="Upload a CSV or Excel file") |
|
btn=gr.Button(value="Download Report") |
|
|
|
with gr.Row(): |
|
|
|
|
|
gr.HTML(value="""<h1 style="color: #3399FF; text-shadow: 1px 1px 2px #ddd;">PANDAS REPORT</h1>""") |
|
out1=gr.File(label="Download CSV") |
|
gr.HTML(value="""<h1 style="color: #3399FF; text-shadow: 1px 1px 2px #ddd;">DATAPREP REPORT</h1>""") |
|
out2=gr.File(label="Download CSV") |
|
gr.HTML(value="""<h1 style="color: #3399FF; text-shadow: 1px 1px 2px #ddd;">SWEETVIZ REPORT</h1>""") |
|
out3=gr.File(label="Download CSV") |
|
with gr.Row(): |
|
with gr.Column(): |
|
gr.Markdown("Uploaded File") |
|
dataframe1=gr.Dataframe() |
|
with gr.Column(): |
|
gr.Markdown("Columns Analysis") |
|
dataframe2=gr.Dataframe() |
|
btn.click(generate_report,inputs=[file],outputs=[out1,out3,dataframe1,dataframe2]) |
|
cluster.launch() |
|
|