|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import datasets |
|
import polars as pl |
|
|
|
|
|
class Dataset: |
|
@staticmethod |
|
def load( |
|
dataset_path:str |
|
): |
|
""" |
|
Load a dataset from disk. |
|
|
|
Parameters |
|
---------- |
|
dataset_path : str |
|
The path to the dataset on disk. |
|
|
|
Returns |
|
------- |
|
datasets.Dataset |
|
The loaded dataset. |
|
|
|
Notes |
|
----- |
|
This method statically loads a dataset from disk using the `load_from_disk` function |
|
provided by the `datasets` module. The dataset is expected to be stored in a specific |
|
format supported by the `datasets` library. |
|
|
|
Example |
|
------- |
|
>>> dataset_path = "/path/to/dataset" |
|
>>> dataset = Dataset.load(dataset_path) |
|
""" |
|
dataset = datasets.load_from_disk( |
|
dataset_path=dataset_path |
|
) |
|
|
|
return dataset |
|
|
|
|
|
@staticmethod |
|
def save( |
|
dataset: datasets.Dataset, |
|
dataset_path: str |
|
) -> None: |
|
""" |
|
Save a dataset to disk. |
|
|
|
Parameters |
|
---------- |
|
dataset : datasets.Dataset |
|
The dataset to be saved. |
|
|
|
dataset_path : str |
|
The path where the dataset will be saved on disk. |
|
|
|
Returns |
|
------- |
|
None |
|
|
|
Notes |
|
----- |
|
This method statically saves a dataset to disk using the `save_to_disk` function |
|
provided by the `datasets` module. The dataset is expected to be in a format |
|
supported by the `datasets` library. |
|
|
|
Example |
|
------- |
|
>>> dataset = load_dataset("my_dataset") |
|
>>> dataset_path = "/path/to/save/dataset" |
|
>>> Dataset.save(dataset, dataset_path) |
|
""" |
|
datasets.save_to_disk( |
|
dataset, |
|
dataset_path |
|
) |
|
|
|
return None |
|
|
|
@staticmethod |
|
def convert_to_polars( |
|
dataset: datasets.Dataset |
|
) -> pl.DataFrame: |
|
""" |
|
Convert a dataset to a Polars DataFrame. |
|
|
|
Parameters |
|
---------- |
|
dataset : datasets.Dataset |
|
The dataset to be converted to a Polars DataFrame. |
|
|
|
Returns |
|
------- |
|
pl.DataFrame |
|
A Polars DataFrame representing the dataset. |
|
|
|
Notes |
|
----- |
|
This method converts a dataset object to a Polars DataFrame, which is a |
|
memory-efficient and fast data manipulation library for Rust. |
|
|
|
Raises |
|
------ |
|
Exception |
|
If an error occurs during the conversion process. |
|
|
|
Examples |
|
-------- |
|
>>> dataset = datasets.Dataset(data=arrow_table) |
|
>>> dataframe = ClassName.convert_to_polars(dataset) |
|
""" |
|
try: |
|
dataframe = pl.from_arrow(dataset.data.table).with_row_index() |
|
|
|
except: |
|
dataframe = pl.from_arrow(dataset.data.table).with_row_count( |
|
name="index" |
|
) |
|
|
|
return dataframe |
|
|
|
|