# -*- coding: utf-8 -*- # Copyright (c) Louis Brulé Naudet. All Rights Reserved. # This software may be used and distributed according to the terms of the License Agreement. # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import datasets import polars as pl class Dataset: @staticmethod def load( dataset_path:str ): """ Load a dataset from disk. Parameters ---------- dataset_path : str The path to the dataset on disk. Returns ------- datasets.Dataset The loaded dataset. Notes ----- This method statically loads a dataset from disk using the `load_from_disk` function provided by the `datasets` module. The dataset is expected to be stored in a specific format supported by the `datasets` library. Example ------- >>> dataset_path = "/path/to/dataset" >>> dataset = Dataset.load(dataset_path) """ dataset = datasets.load_from_disk( dataset_path=dataset_path ) return dataset @staticmethod def save( dataset: datasets.Dataset, dataset_path: str ) -> None: """ Save a dataset to disk. Parameters ---------- dataset : datasets.Dataset The dataset to be saved. dataset_path : str The path where the dataset will be saved on disk. Returns ------- None Notes ----- This method statically saves a dataset to disk using the `save_to_disk` function provided by the `datasets` module. The dataset is expected to be in a format supported by the `datasets` library. Example ------- >>> dataset = load_dataset("my_dataset") >>> dataset_path = "/path/to/save/dataset" >>> Dataset.save(dataset, dataset_path) """ datasets.save_to_disk( dataset, dataset_path ) return None @staticmethod def convert_to_polars( dataset: datasets.Dataset ) -> pl.DataFrame: """ Convert a dataset to a Polars DataFrame. Parameters ---------- dataset : datasets.Dataset The dataset to be converted to a Polars DataFrame. Returns ------- pl.DataFrame A Polars DataFrame representing the dataset. Notes ----- This method converts a dataset object to a Polars DataFrame, which is a memory-efficient and fast data manipulation library for Rust. Raises ------ Exception If an error occurs during the conversion process. Examples -------- >>> dataset = datasets.Dataset(data=arrow_table) >>> dataframe = ClassName.convert_to_polars(dataset) """ try: dataframe = pl.from_arrow(dataset.data.table).with_row_index() except: dataframe = pl.from_arrow(dataset.data.table).with_row_count( name="index" ) return dataframe