Spaces:

rameshmoorthy
/

NAC_Product_analysis

Sleeping

File size: 7,629 Bytes

0a5d39a
7a75289
0a5d39a
71c9026
dd375ae
1065d81
bbe3128
 
 
c335d03
6a5ab3f
68c1008
 
 
 
6a5ab3f
68c1008
 
6a5ab3f
68c1008
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a5ab3f
68c1008
 
 
 
 
 
 
 
 
 
 
 
 
5bc9631
68c1008
5bc9631
68c1008
5bc9631
68c1008
 
 
5bc9631
68c1008
 
 
b9cae72
 
 
a899a25
d4abba2
 
 
 
3275b57
b9cae72
e7f020c
 
 
 
d4abba2
3275b57
 
 
 
 
 
 
 
88adc6e
0a5d39a
d4abba2
8cc8a10
cb63445
3cd0174
aff03dd
 
 
3cd0174
 
 
 
3275b57
af6630f
3cd0174
d4abba2
0d398ac
d4abba2
390a5c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4c3e715
390a5c2
 
bbe3128
c335d03
 
 
 
31200cf
9b7bc38
31200cf
ab0b63c
 
 
9b7bc38
ab0b63c
 
 
 
88adc6e
 
ab0b63c
c335d03
6a5ab3f
bd103f5
 
 
 
 
d4abba2
bd103f5
 
 
 
a899a25
a141a8d
4c3e715
a141a8d
 
 
d4abba2
a141a8d
d4abba2
 
4c3e715
d4abba2
4c3e715
d4abba2
bd103f5
 
 
 
 
d25ab29
18d1282
6213803
6a5ab3f
 
18d1282
6213803
88adc6e
719ba38
18d1282
6213803
88adc6e
bd103f5
 
 
 
b9cae72

import pandas as pd
import ydata_profiling
import gradio as gr
from pydantic_settings import BaseSettings
from tempfile import NamedTemporaryFile
import sweetviz as sv
#from datatile.summary.df import DataFrameSummary
from autoviz.AutoViz_Class import AutoViz_Class
from traceml.summary.df import DataFrameSummary

def variable_table(df):
    """
    Analyzes a DataFrame and categorizes variables with colorful HTML formatting.
    
    Args:
      df (pandas.DataFrame): The DataFrame to analyze.
    
    Returns:
      str: HTML code representing the analysis results with colorful highlights.
    """
    # Analyze variable types
    numerical_vars=[]
    text_vars=[]
    categorical_vars=[]
    # Identify categorical, numerical, and text columns
    # Iterate over columns
    for col in df.columns:
        print(col )
        unique_values = df[col].nunique()
        total_values = df[col].count()
        dtype_kind = df[col].dtype.kind
        print( 'unique value-',unique_values,' total_values ',total_values,dtype_kind)
        # Check criteria for numerical, text, and categorical variables
        if (dtype_kind == 'f' or dtype_kind == 'i') and unique_values >= total_values / 10:
            numerical_vars.append(col)
            print('Numerical')
        elif (dtype_kind == 'O' or dtype_kind == 'b') and unique_values > total_values / 10:
            text_vars.append(col)
            print('Text')
        else:
            categorical_vars.append(col)
            print('categorical')
    print(categorical_vars,numerical_vars,text_vars)

    # Build HTML table with styles
    table_style = 'border: 1px solid #ddd; border-collapse: collapse; text-align: left; font-size: 14px;'
    header_style = 'background-color: #f2f2f2; padding: 5px 10px;'
    data_style = 'padding: 5px 10px; border-bottom: 1px solid #ddd;'
    category_color = '#90ee90'  # Light green for categorical
    numerical_color = '#add8e6'  # Light blue for numerical
    text_color = '#ffd9b3'  # Light yellow for text
    
    html = f"<table style='{table_style}'>"
    html += f"<tr><th style='{header_style}'>Variable Type</th><th style='{header_style}'>Columns</th></tr>"
    
    # Add rows for each variable type with coloring
    if categorical_vars:
        html += f"<tr style='background-color: {category_color};'><td>Categorical</td><td style='{data_style}'>{', '.join(categorical_vars)}</td></tr>"
    if numerical_vars:
        html += f"<tr style='background-color: {numerical_color};'><td>Numerical</td><td style='{data_style}'>{', '.join(numerical_vars)}</td></tr>"
    if text_vars:
        html += f"<tr style='background-color: {text_color};'><td>Text</td><td style='{data_style}'>{', '.join(text_vars)}</td></tr>"
    
    # Handle cases where no variables are found
    if not (categorical_vars or numerical_vars or text_vars):
        html += "<tr><td>No variables found!</td></tr>"
    
    html += "</table>"
    return html

def generate_report(file, type):
    df = pd.read_csv(file) if file.name.endswith(".csv") else pd.read_excel(file)

    pandas_html_report =ydata_profiling.ProfileReport(df).to_html() 
    temp_file1 = NamedTemporaryFile(delete=False, suffix=".html")
    temp_file1.write(pandas_html_report.encode('utf-8'))
    temp_file1.close()
    print('Pandas completed')
    
    # dataprep_report = create_report(df)
    # temp_file2 = NamedTemporaryFile(delete=False, suffix=".html")
    # temp_file2.write(dataprep_report.encode('utf-8'))
    # temp_file2.close()

    def convert_mixed_to_str(df):
  
      mixed_cols = df.select_dtypes(include=['object']).columns  # Detect object dtype (usually mixed)
      for col in mixed_cols:
        df[col] = df[col].astype(str)  # Convert mixed columns to strings
      return df

    df = convert_mixed_to_str(df.copy())
    df.columns = df.columns.str.upper()

    sweetviz_report = sv.analyze(df)
    #sweetviz_report=sweetviz_report.show_html()
    #print(type(sweetviz_report))
    report=sweetviz_report.show_html(  filepath='SWEETVIZ_REPORT.html', 
            open_browser=False, 
            layout='widescreen', 
            scale=None)
    HTMLFileToBeOpened = open('SWEETVIZ_REPORT.html', "r") 
  
    # Reading the file and storing in a variable 
    contents = HTMLFileToBeOpened.read() 
    print('Pandas completed')
    
  
    temp_file3 = NamedTemporaryFile(delete=False, suffix=".html")
    temp_file3.write(contents.encode('utf-8'))
    temp_file3.close()
    try:
        #AUTOVIZ
        AV = AutoViz_Class()
    
        dfviz = AV.AutoViz(
        "",
        sep=",",
        depVar="",
        dfte=df,
        header=0,
        verbose=2,
        lowess=False,
        chart_format="html",
        max_rows_analyzed=500,
        max_cols_analyzed=20,
        save_plot_dir="plots"
        )
    except Exception as e:
        dfviz=df
        print(e)
        print('Exception during Autoviz')

    
    
   
    dfs = DataFrameSummary(df)
    sd=dfs.summary()
    sd.index.name = 'PARAMETERS'
    sd1= sd.reset_index(drop=False)
    def highlight_cols(x): 
        df = x.copy() 
        df.loc[:, :] = 'color: purple'
        df[['PARAMETERS']] = 'color: red'
        return df 

    # Applying the style function
    sd1 = sd1.style.apply(highlight_cols, axis = None)
    sd1.columns = sd1.columns.str.upper()


    
    return temp_file1.name  ,temp_file3.name ,variable_table(df),dfviz,sd1
def handle_click(file, sheet_name):
            sheet_dropdown.update(disabled=True)  # Disable during report generation
            results = generate_report(file, sheet_name)
            sheet_dropdown.update(disabled=False)  # Re-enable sheet selection
            return results
    
def update_sheets(file):
    sheet_names = pd.ExcelFile(file).sheet_names
    sheet_dropdown.update(options=sheet_names, disabled=False)

with gr.Blocks() as cluster:
    with gr.Column():
        gr.HTML(value="""<h1 style="color: #1999FF; text-shadow: 1px 1px 2px #ddd;">EXCEL ANALYSIS AND INSIGHTS</h1>""")
        with gr.Row():
            file=gr.File(file_types=['.csv', '.xlsx'], label="Upload a CSV or Excel file")
            btn=gr.Button(value="Download Report") 
            
        with gr.Row():
            
            
            gr.HTML(value="""<h2 style="color: #3399FF; text-shadow: 1px 1px 2px #ddd;">PANDAS REPORT</h2>""")
            out1=gr.File(label="Download CSV")
            gr.HTML(value="""<h2 style="color: #3399FF; text-shadow: 1px 1px 2px #ddd;">SWEETVIZ REPORT</h2>""")
            out2=gr.File(label="Download CSV")
        sheet_dropdown = gr.Dropdown(
            label="Select sheet", options=[], disabled=True  # Initially disabled
        )
    
        
        with gr.Column():
            gr.HTML(value="""<h2 style="color: #BA4A00; text-shadow: 1px 1px 2px #ddd;"COLUMNS IN FILE</h2>""")
            #gr.Markdown("***Uploaded File***")
            var=gr.HTML()
        with gr.Column():
            gr.HTML(value="""<h2 style="color: #BA4A00; text-shadow: 1px 1px 2px #ddd;"UPLOADED FILE</h2>""")
            #gr.Markdown("#Uploaded File")
            dataframe1=gr.Dataframe()
        with gr.Column():
            gr.HTML(value="""<h2 style="color: #BA4A00; text-shadow: 1px 1px 2px #ddd;"COLUMNS STATS AND ANALYSIS</h2>""")
            #gr.Markdown("#Columns Analysis")
            dataframe2=gr.Dataframe()
    
    btn.click(handle_click, inputs=[file, sheet_dropdown], outputs=[out1, out2, var, dataframe1, dataframe2])
    file.observe(update_sheets, names=["data"])
    #btn.click(generate_report,inputs=[file],outputs=[out1,out2,var,dataframe1,dataframe2])
cluster.launch()