hackathon / data_processor.py
Ashar086's picture
Create data_processor.py
93fbf36 verified
raw
history blame
548 Bytes
import pandas as pd
import numpy as np
class DataProcessor:
def __init__(self, df):
self.df = df
def get_columns_with_missing_values(self):
return self.df.columns[self.df.isnull().any()].tolist()
def clean_data(self):
# Remove rows with any missing values
df_cleaned = self.df.dropna()
# Remove duplicate rows
df_cleaned = df_cleaned.drop_duplicates()
# Reset the index
df_cleaned = df_cleaned.reset_index(drop=True)
return df_cleaned