Spaces:

rameshmoorthy
/

NAC_Product_analysis

Sleeping

File size: 9,994 Bytes

0a5d39a
7a75289
0a5d39a
71c9026
dd375ae
1065d81
bbe3128
 
 
24047b3
c335d03
6a5ab3f
68c1008
 
 
 
6a5ab3f
68c1008
 
6a5ab3f
68c1008
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a5ab3f
68c1008
 
 
 
 
 
 
 
 
 
 
 
 
5bc9631
68c1008
5bc9631
68c1008
5bc9631
68c1008
 
 
5bc9631
68c1008
 
 
b9cae72
 
 
a899a25
d4abba2
 
 
 
3275b57
b9cae72
e7f020c
 
 
 
d4abba2
3275b57
 
 
 
 
 
 
 
88adc6e
0a5d39a
d4abba2
8cc8a10
cb63445
3cd0174
aff03dd
 
 
3cd0174
 
 
 
3275b57
af6630f
3cd0174
d4abba2
0d398ac
d4abba2
e9a3347
390a5c2
d21feaf
123707b
 
8c65d6c
 
0435627
cb91710
8c65d6c
 
 
 
 
1e376b8
8c65d6c
1e376b8
123707b
8c65d6c
 
 
cb91710
8c65d6c
 
 
 
 
1e376b8
8c65d6c
390a5c2
4c3e715
390a5c2
 
bbe3128
8c65d6c
 
123707b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c335d03
 
31200cf
9b7bc38
31200cf
ab0b63c
 
 
9b7bc38
ab0b63c
 
 
 
88adc6e
 
ab0b63c
c335d03
9669d6d
0fe990c
bd103f5
bc3054f
0fe990c
 
d4abba2
bd103f5
 
0fe990c
bc3054f
0fe990c
 
 
 
 
bd103f5
a899a25
a141a8d
4c3e715
a141a8d
 
 
d4abba2
a141a8d
d4abba2
 
4c3e715
d4abba2
4c3e715
d4abba2
bd103f5
24047b3
bd103f5
 
 
d25ab29
18d1282
6213803
6a5ab3f
 
18d1282
6213803
88adc6e
719ba38
18d1282
6213803
88adc6e
123707b
 
24047b3
123707b
 
 
 
 
 
 
 
 
79ccabc
123707b
 
 
 
 
 
 
 
 
 
 
b712f21
0fe990c
bd103f5
24047b3

import pandas as pd
import ydata_profiling
import gradio as gr
from pydantic_settings import BaseSettings
from tempfile import NamedTemporaryFile
import sweetviz as sv
#from datatile.summary.df import DataFrameSummary
from autoviz.AutoViz_Class import AutoViz_Class
from traceml.summary.df import DataFrameSummary
import os

def variable_table(df):
    """
    Analyzes a DataFrame and categorizes variables with colorful HTML formatting.
    
    Args:
      df (pandas.DataFrame): The DataFrame to analyze.
    
    Returns:
      str: HTML code representing the analysis results with colorful highlights.
    """
    # Analyze variable types
    numerical_vars=[]
    text_vars=[]
    categorical_vars=[]
    # Identify categorical, numerical, and text columns
    # Iterate over columns
    for col in df.columns:
        print(col )
        unique_values = df[col].nunique()
        total_values = df[col].count()
        dtype_kind = df[col].dtype.kind
        print( 'unique value-',unique_values,' total_values ',total_values,dtype_kind)
        # Check criteria for numerical, text, and categorical variables
        if (dtype_kind == 'f' or dtype_kind == 'i') and unique_values >= total_values / 10:
            numerical_vars.append(col)
            print('Numerical')
        elif (dtype_kind == 'O' or dtype_kind == 'b') and unique_values > total_values / 10:
            text_vars.append(col)
            print('Text')
        else:
            categorical_vars.append(col)
            print('categorical')
    print(categorical_vars,numerical_vars,text_vars)

    # Build HTML table with styles
    table_style = 'border: 1px solid #ddd; border-collapse: collapse; text-align: left; font-size: 14px;'
    header_style = 'background-color: #f2f2f2; padding: 5px 10px;'
    data_style = 'padding: 5px 10px; border-bottom: 1px solid #ddd;'
    category_color = '#90ee90'  # Light green for categorical
    numerical_color = '#add8e6'  # Light blue for numerical
    text_color = '#ffd9b3'  # Light yellow for text
    
    html = f"<table style='{table_style}'>"
    html += f"<tr><th style='{header_style}'>Variable Type</th><th style='{header_style}'>Columns</th></tr>"
    
    # Add rows for each variable type with coloring
    if categorical_vars:
        html += f"<tr style='background-color: {category_color};'><td>Categorical</td><td style='{data_style}'>{', '.join(categorical_vars)}</td></tr>"
    if numerical_vars:
        html += f"<tr style='background-color: {numerical_color};'><td>Numerical</td><td style='{data_style}'>{', '.join(numerical_vars)}</td></tr>"
    if text_vars:
        html += f"<tr style='background-color: {text_color};'><td>Text</td><td style='{data_style}'>{', '.join(text_vars)}</td></tr>"
    
    # Handle cases where no variables are found
    if not (categorical_vars or numerical_vars or text_vars):
        html += "<tr><td>No variables found!</td></tr>"
    
    html += "</table>"
    return html

def generate_report(file, type):
    df = pd.read_csv(file) if file.name.endswith(".csv") else pd.read_excel(file)

    pandas_html_report =ydata_profiling.ProfileReport(df).to_html() 
    temp_file1 = NamedTemporaryFile(delete=False, suffix=".html")
    temp_file1.write(pandas_html_report.encode('utf-8'))
    temp_file1.close()
    print('Pandas completed')
    
    # dataprep_report = create_report(df)
    # temp_file2 = NamedTemporaryFile(delete=False, suffix=".html")
    # temp_file2.write(dataprep_report.encode('utf-8'))
    # temp_file2.close()

    def convert_mixed_to_str(df):
  
      mixed_cols = df.select_dtypes(include=['object']).columns  # Detect object dtype (usually mixed)
      for col in mixed_cols:
        df[col] = df[col].astype(str)  # Convert mixed columns to strings
      return df

    df = convert_mixed_to_str(df.copy())
    df.columns = df.columns.str.upper()

    sweetviz_report = sv.analyze(df)
    #sweetviz_report=sweetviz_report.show_html()
    #print(type(sweetviz_report))
    report=sweetviz_report.show_html(  filepath='SWEETVIZ_REPORT.html', 
            open_browser=False, 
            layout='widescreen', 
            scale=None)
    HTMLFileToBeOpened = open('SWEETVIZ_REPORT.html', "r") 
  
    # Reading the file and storing in a variable 
    contents = HTMLFileToBeOpened.read() 
    print('Pandas completed')
    
  
    temp_file3 = NamedTemporaryFile(delete=False, suffix=".html")
    temp_file3.write(contents.encode('utf-8'))
    temp_file3.close()
    dfviz=df
    try:
        AV = AutoViz_Class()
        dft = AV.AutoViz(filename="", sep=',', dfte=df, header=0, verbose=1,
                        lowess=False,chart_format='html',max_rows_analyzed=150000,
                         max_cols_analyzed=30,save_plot_dir=None)
        reports = []
        print(os.listdir("./"))
        output_dir_html="./AutoViz_Plots"
        for filename in os.listdir(output_dir_html):
            if filename.endswith(".html"):
                title = os.path.splitext(filename)[0]  # Extract title from filename (without extension)
                reports.append(title)
                reports.append(output_dir_html+'/'+filename)
                print(reports)
                
        dft = AV.AutoViz(filename='', sep=',', header=0, verbose=2,dfte=df,
                      lowess=False,chart_format='png',max_rows_analyzed=150000,
                      max_cols_analyzed=30,save_plot_dir=None)
        
        imgs = []
        output_dir_img="./AutoViz_Plots"
        for filename in os.listdir(output_dir_img):
            if filename.endswith(".png"):
                title = os.path.splitext(filename)[0]  # Extract title from filename (without extension)
                imgs.append(title)
                imgs.append(output_dir_img+'/'+filename)
                print(imgs)
                
    except Exception as e:
        dfviz=df
        print(e)
        print('Exception during Autoviz')

 
   
    while len(reports)<10:
          # Comparing the lengths and appending t and m to output if needed

          t = gr.Markdown(visible=False)
          m = gr.File(visible=False)
          reports.append(t)
          reports.append(m)
          
    while len(imgs)<10:
          # Comparing the lengths and appending t and m to output if needed
    
          t = gr.Markdown(visible=False)
          m = gr.Image(visible=False)
          imgs.append(t)
          imgs.append(m)   
   
    dfs = DataFrameSummary(df)
    sd=dfs.summary()
    sd.index.name = 'PARAMETERS'
    sd1= sd.reset_index(drop=False)
    def highlight_cols(x): 
        df = x.copy() 
        df.loc[:, :] = 'color: purple'
        df[['PARAMETERS']] = 'color: red'
        return df 

    # Applying the style function
    sd1 = sd1.style.apply(highlight_cols, axis = None)
    sd1.columns = sd1.columns.str.upper()


    
    return [temp_file1.name  ,temp_file3.name ,variable_table(df),dfviz,sd1]+reports+imgs
    
def handle_click(file, sheet_name):
    print('Handle click')  
    results = generate_report(file, sheet_name)
    return results
    
def update_sheets(file):
    sheet_names = pd.ExcelFile(file).sheet_names
    
    print('Sheet updated')
    sheet_dropdown = gr.Dropdown(
            label="Select sheet", choices=[sheet_names], visible=True  # Initially disabled
        )
    return sheet_dropdown
    

with gr.Blocks() as cluster:
    with gr.Column():
        gr.HTML(value="""<h1 style="color: #1999FF; text-shadow: 1px 1px 2px #ddd;">EXCEL ANALYSIS AND INSIGHTS</h1>""")
        with gr.Row():
            file=gr.File(file_types=['.csv', '.xlsx'], label="Upload a CSV or Excel file")
            btn=gr.Button(value="Download Report") 
            
        with gr.Row():
            
            
            gr.HTML(value="""<h2 style="color: #3399FF; text-shadow: 1px 1px 2px #ddd;">PANDAS REPORT</h2>""")
            out1=gr.File(label="Download CSV")
            gr.HTML(value="""<h2 style="color: #3399FF; text-shadow: 1px 1px 2px #ddd;">SWEETVIZ REPORT</h2>""")
            out2=gr.File(label="Download CSV")
        sheet_dropdown = gr.Dropdown(
            label="Select sheet", choices=[], visible=False  # Initially disabled
        )
    
        
        with gr.Column():
            gr.HTML(value="""<h2 style="color: #BA4A00; text-shadow: 1px 1px 2px #ddd;"COLUMNS IN FILE</h2>""")
            #gr.Markdown("***Uploaded File***")
            var=gr.HTML()
        with gr.Column():
            gr.HTML(value="""<h2 style="color: #BA4A00; text-shadow: 1px 1px 2px #ddd;"UPLOADED FILE</h2>""")
            #gr.Markdown("#Uploaded File")
            dataframe1=gr.Dataframe()
        with gr.Column():
            gr.HTML(value="""<h2 style="color: #BA4A00; text-shadow: 1px 1px 2px #ddd;"COLUMNS STATS AND ANALYSIS</h2>""")
            #gr.Markdown("#Columns Analysis")
            dataframe2=gr.Dataframe()
            
        #Autoviz insights graphs
  
        gr.HTML(value="""<h3 style="color: #1999FF; text-shadow: 1px 1px 2px #ddd;">Insight Visualisations</h3>""")
        with gr.Column():
          plots2 = []
          for i in range(5):
              t = gr.Markdown()
              with gr.Column(scale=1):
                m=gr.Image()
                plots2.append(t)
                plots2.append(m)
                print(type(plots2[0]))
        gr.HTML(value="""<h3 style="color: #1999FF; text-shadow: 1px 1px 2px #ddd;">Insight graphs</h3>""")
        with gr.Row():           
          plots1 = []
          for i in range(5):
            with gr.Row():
              t = gr.Markdown()
              m=gr.File()
              plots1.append(t)
              plots1.append(m)
              print(type(plots1[0]))
    
    btn.click(handle_click, inputs=[file, sheet_dropdown], outputs=[out1, out2, var,dataframe1, dataframe2]+plots1+plots2)
    file.change(update_sheets, inputs=file, outputs=sheet_dropdown) 
    #btn.click(generate_report,inputs=[file],outputs=[out1,out2,var,dataframe1,dataframe2])
cluster.launch(debug=True)