Canstralian's picture
Update app.py
653b423 verified
import pandas as pd
import os
def load_dataset(file_path: str) -> pd.DataFrame:
"""
Loads a dataset from a specified file path into a Pandas DataFrame.
This function reads a dataset from a given file path. The file can be in various formats
supported by Pandas, such as CSV, Excel, or JSON. The function returns the dataset as a
Pandas DataFrame, which is a powerful data structure for data manipulation and analysis.
Parameters:
- file_path (str): The path to the dataset file. This should be a string representing
the location of the file on the filesystem.
Returns:
pd.DataFrame: A DataFrame containing the loaded dataset.
Raises:
- FileNotFoundError: If the specified file path does not exist or cannot be found.
- ValueError: If the file format is not supported or if the file is empty.
- pd.errors.EmptyDataError: If the file is empty and cannot be read into a DataFrame.
- pd.errors.ParserError: If there is an error while parsing the file.
- TypeError: If the file path is not a string or is an unsupported file format.
Examples:
>>> df = load_dataset('data/my_dataset.csv')
>>> print(df.head())
"""
# Checking if file path is a string
if not isinstance(file_path, str):
raise TypeError(f"Expected file path to be a string, but got {type(file_path).__name__}.")
# Checking if the file exists
if not os.path.exists(file_path):
raise FileNotFoundError(f"File not found: {file_path}. Please check the path and try again.")
# Attempting to load the dataset based on the file extension
try:
# Determine the file extension and load the file accordingly
file_extension = file_path.split('.')[-1].lower()
if file_extension == 'csv':
dataset = pd.read_csv(file_path)
elif file_extension in ['xlsx', 'xls']:
dataset = pd.read_excel(file_path)
elif file_extension == 'json':
dataset = pd.read_json(file_path)
else:
raise ValueError(f"Unsupported file format: {file_extension}. Supported formats are CSV, Excel, and JSON.")
# Checking if the dataset is empty
if dataset.empty:
raise pd.errors.EmptyDataError(f"The file at {file_path} is empty and cannot be loaded into a DataFrame.")
return dataset
except ValueError as value_error:
raise ValueError(f"Error loading the dataset from {file_path}. Please ensure the file is in a supported format and not empty.") from value_error
except pd.errors.EmptyDataError as empty_data_error:
raise pd.errors.EmptyDataError(f"The file at {file_path} is empty and cannot be loaded into a DataFrame.") from empty_data_error
except pd.errors.ParserError as parser_error:
raise pd.errors.ParserError(f"Error parsing the file at {file_path}. Please check the file format and contents.") from parser_error
except Exception as e:
raise Exception(f"An error occurred while loading the file: {file_path}. Error details: {str(e)}") from e
# Example usage of the load_dataset function:
try:
# Example 1: Loading a dataset from a CSV file
dataset = load_dataset('data/my_dataset.csv')
print("Dataset loaded successfully!")
print(dataset.head()) # Displaying the first few rows of the dataset
# Example 2: Loading a dataset from an Excel file
dataset = load_dataset('data/my_dataset.xlsx')
print("Dataset loaded successfully!")
print(dataset.head()) # Displaying the first few rows of the dataset
# Example 3: Attempting to load a non-existent file (should raise an error)
dataset = load_dataset('data/non_existent_file.csv')
except Exception as e:
print(f"An error occurred: {e}")