rameshmoorthy's picture
Update app.py
31200cf verified
raw
history blame
3.39 kB
import pandas as pd
import ydata_profiling
import gradio as gr
from pydantic_settings import BaseSettings
from tempfile import NamedTemporaryFile
import sweetviz as sv
#from datatile.summary.df import DataFrameSummary
from autoviz.AutoViz_Class import AutoViz_Class
from traceml.summary.df import DataFrameSummary
def generate_report(file, type):
df = pd.read_csv(file) if file.name.endswith(".csv") else pd.read_excel(file)
pandas_html_report =ydata_profiling.ProfileReport(df).to_html()
temp_file1 = NamedTemporaryFile(delete=False, suffix=".html")
temp_file1.write(pandas_html_report.encode('utf-8'))
temp_file1.close()
print('Pandas completed')
# dataprep_report = create_report(df)
# temp_file2 = NamedTemporaryFile(delete=False, suffix=".html")
# temp_file2.write(dataprep_report.encode('utf-8'))
# temp_file2.close()
def convert_mixed_to_str(df):
mixed_cols = df.select_dtypes(include=['object']).columns # Detect object dtype (usually mixed)
for col in mixed_cols:
df[col] = df[col].astype(str) # Convert mixed columns to strings
return df
df = convert_mixed_to_str(df.copy())
sweetviz_report = sv.analyze(df)
#sweetviz_report=sweetviz_report.show_html()
#print(type(sweetviz_report))
report=sweetviz_report.show_html( filepath='SWEETVIZ_REPORT.html',
open_browser=False,
layout='widescreen',
scale=None)
HTMLFileToBeOpened = open('SWEETVIZ_REPORT.html', "r")
# Reading the file and storing in a variable
contents = HTMLFileToBeOpened.read()
print('Pandas completed')
temp_file3 = NamedTemporaryFile(delete=False, suffix=".html")
temp_file3.write(contents.encode('utf-8'))
temp_file3.close()
#AUTOVIZ
AV = AutoViz_Class()
dfviz = AV.AutoViz(
"",
sep=",",
depVar="",
dfte=df,
header=0,
verbose=2,
lowess=False,
chart_format="html",
max_rows_analyzed=500,
max_cols_analyzed=20,
save_plot_dir="plots"
)
dfs = DataFrameSummary(df)
sd=dfs.summary()
sd.index.name = 'Parameters'
sd1= sd.reset_index(drop=False)
return temp_file1.name ,temp_file3.name ,dfviz,sd1
with gr.Blocks() as cluster:
with gr.Column():
with gr.Row():
file=gr.File(file_types=['.csv', '.xlsx'], label="Upload a CSV or Excel file")
btn=gr.Button(value="Download Report")
with gr.Row():
gr.HTML(value="""<h1 style="color: #3399FF; text-shadow: 1px 1px 2px #ddd;">PANDAS REPORT</h1>""")
out1=gr.File(label="Download CSV")
gr.HTML(value="""<h1 style="color: #3399FF; text-shadow: 1px 1px 2px #ddd;">DATAPREP REPORT</h1>""")
out2=gr.File(label="Download CSV")
gr.HTML(value="""<h1 style="color: #3399FF; text-shadow: 1px 1px 2px #ddd;">SWEETVIZ REPORT</h1>""")
out3=gr.File(label="Download CSV")
with gr.Row():
with gr.Column():
gr.Marker("Uploaded File")
dataframe1=gr.Dataframe()
with gr.Column():
gr.Marker("Columns Analysis")
dataframe2=gr.Dataframe()
btn.click(generate_report,inputs=[file],outputs=[out1,out3,dataframe1,dataframe2])
cluster.launch()