{ "cells": [ { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "import os\n", "import jsonlines\n", "from uuid import uuid4\n", "import pandas as pd\n", "\n", "from datasets import load_dataset\n", "import subprocess\n", "\n", "from dotenv import load_dotenv,find_dotenv\n", "load_dotenv(find_dotenv(),override=True)\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "ename": "ValueError", "evalue": "Invalid pattern: '**' can only be an entire path component", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[6], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m dataset_name \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mai-aerospace/ams_data_train_generic_v0.1_100\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m----> 2\u001b[0m dataset\u001b[38;5;241m=\u001b[39m\u001b[43mload_dataset\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdataset_name\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/Documents/HuggingFace/autotrain-playground/.venv/lib/python3.11/site-packages/datasets/load.py:2112\u001b[0m, in \u001b[0;36mload_dataset\u001b[0;34m(path, name, data_dir, data_files, split, cache_dir, features, download_config, download_mode, verification_mode, ignore_verifications, keep_in_memory, save_infos, revision, token, use_auth_token, task, streaming, num_proc, storage_options, **config_kwargs)\u001b[0m\n\u001b[1;32m 2107\u001b[0m verification_mode \u001b[38;5;241m=\u001b[39m VerificationMode(\n\u001b[1;32m 2108\u001b[0m (verification_mode \u001b[38;5;129;01mor\u001b[39;00m VerificationMode\u001b[38;5;241m.\u001b[39mBASIC_CHECKS) \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m save_infos \u001b[38;5;28;01melse\u001b[39;00m VerificationMode\u001b[38;5;241m.\u001b[39mALL_CHECKS\n\u001b[1;32m 2109\u001b[0m )\n\u001b[1;32m 2111\u001b[0m \u001b[38;5;66;03m# Create a dataset builder\u001b[39;00m\n\u001b[0;32m-> 2112\u001b[0m builder_instance \u001b[38;5;241m=\u001b[39m \u001b[43mload_dataset_builder\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2113\u001b[0m \u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2114\u001b[0m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2115\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2116\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_files\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_files\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2117\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2118\u001b[0m \u001b[43m \u001b[49m\u001b[43mfeatures\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfeatures\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2119\u001b[0m \u001b[43m \u001b[49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2120\u001b[0m \u001b[43m \u001b[49m\u001b[43mdownload_mode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_mode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2121\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2122\u001b[0m \u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2123\u001b[0m \u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstorage_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2124\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mconfig_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2125\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2127\u001b[0m \u001b[38;5;66;03m# Return iterable dataset in case of streaming\u001b[39;00m\n\u001b[1;32m 2128\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m streaming:\n", "File \u001b[0;32m~/Documents/HuggingFace/autotrain-playground/.venv/lib/python3.11/site-packages/datasets/load.py:1798\u001b[0m, in \u001b[0;36mload_dataset_builder\u001b[0;34m(path, name, data_dir, data_files, cache_dir, features, download_config, download_mode, revision, token, use_auth_token, storage_options, **config_kwargs)\u001b[0m\n\u001b[1;32m 1796\u001b[0m download_config \u001b[38;5;241m=\u001b[39m download_config\u001b[38;5;241m.\u001b[39mcopy() \u001b[38;5;28;01mif\u001b[39;00m download_config \u001b[38;5;28;01melse\u001b[39;00m DownloadConfig()\n\u001b[1;32m 1797\u001b[0m download_config\u001b[38;5;241m.\u001b[39mstorage_options\u001b[38;5;241m.\u001b[39mupdate(storage_options)\n\u001b[0;32m-> 1798\u001b[0m dataset_module \u001b[38;5;241m=\u001b[39m \u001b[43mdataset_module_factory\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1799\u001b[0m \u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1800\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1801\u001b[0m \u001b[43m \u001b[49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1802\u001b[0m \u001b[43m \u001b[49m\u001b[43mdownload_mode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_mode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1803\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1804\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_files\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_files\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1805\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1806\u001b[0m \u001b[38;5;66;03m# Get dataset builder class from the processing script\u001b[39;00m\n\u001b[1;32m 1807\u001b[0m builder_kwargs \u001b[38;5;241m=\u001b[39m dataset_module\u001b[38;5;241m.\u001b[39mbuilder_kwargs\n", "File \u001b[0;32m~/Documents/HuggingFace/autotrain-playground/.venv/lib/python3.11/site-packages/datasets/load.py:1495\u001b[0m, in \u001b[0;36mdataset_module_factory\u001b[0;34m(path, revision, download_config, download_mode, dynamic_modules_path, data_dir, data_files, **download_kwargs)\u001b[0m\n\u001b[1;32m 1490\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(e1, \u001b[38;5;167;01mFileNotFoundError\u001b[39;00m):\n\u001b[1;32m 1491\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mFileNotFoundError\u001b[39;00m(\n\u001b[1;32m 1492\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCouldn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt find a dataset script at \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mrelative_to_absolute_path(combined_path)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m or any data file in the same directory. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1493\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCouldn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt find \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpath\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m on the Hugging Face Hub either: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(e1)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me1\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1494\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1495\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e1 \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1496\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1497\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mFileNotFoundError\u001b[39;00m(\n\u001b[1;32m 1498\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCouldn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt find a dataset script at \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mrelative_to_absolute_path(combined_path)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m or any data file in the same directory.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1499\u001b[0m )\n", "File \u001b[0;32m~/Documents/HuggingFace/autotrain-playground/.venv/lib/python3.11/site-packages/datasets/load.py:1479\u001b[0m, in \u001b[0;36mdataset_module_factory\u001b[0;34m(path, revision, download_config, download_mode, dynamic_modules_path, data_dir, data_files, **download_kwargs)\u001b[0m\n\u001b[1;32m 1464\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m HubDatasetModuleFactoryWithScript(\n\u001b[1;32m 1465\u001b[0m path,\n\u001b[1;32m 1466\u001b[0m revision\u001b[38;5;241m=\u001b[39mrevision,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1469\u001b[0m dynamic_modules_path\u001b[38;5;241m=\u001b[39mdynamic_modules_path,\n\u001b[1;32m 1470\u001b[0m )\u001b[38;5;241m.\u001b[39mget_module()\n\u001b[1;32m 1471\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1472\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mHubDatasetModuleFactoryWithoutScript\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1473\u001b[0m \u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1474\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1475\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1476\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_files\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_files\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1477\u001b[0m \u001b[43m \u001b[49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1478\u001b[0m \u001b[43m \u001b[49m\u001b[43mdownload_mode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_mode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m-> 1479\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_module\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1480\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\n\u001b[1;32m 1481\u001b[0m \u001b[38;5;167;01mException\u001b[39;00m\n\u001b[1;32m 1482\u001b[0m ) \u001b[38;5;28;01mas\u001b[39;00m e1: \u001b[38;5;66;03m# noqa all the attempts failed, before raising the error we should check if the module is already cached.\u001b[39;00m\n\u001b[1;32m 1483\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n", "File \u001b[0;32m~/Documents/HuggingFace/autotrain-playground/.venv/lib/python3.11/site-packages/datasets/load.py:1034\u001b[0m, in \u001b[0;36mHubDatasetModuleFactoryWithoutScript.get_module\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1029\u001b[0m metadata_configs \u001b[38;5;241m=\u001b[39m MetadataConfigs\u001b[38;5;241m.\u001b[39mfrom_dataset_card_data(dataset_card_data)\n\u001b[1;32m 1030\u001b[0m dataset_infos \u001b[38;5;241m=\u001b[39m DatasetInfosDict\u001b[38;5;241m.\u001b[39mfrom_dataset_card_data(dataset_card_data)\n\u001b[1;32m 1031\u001b[0m patterns \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 1032\u001b[0m sanitize_patterns(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdata_files)\n\u001b[1;32m 1033\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdata_files \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1034\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m \u001b[43mget_data_patterns\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbase_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1035\u001b[0m )\n\u001b[1;32m 1036\u001b[0m data_files \u001b[38;5;241m=\u001b[39m DataFilesDict\u001b[38;5;241m.\u001b[39mfrom_patterns(\n\u001b[1;32m 1037\u001b[0m patterns,\n\u001b[1;32m 1038\u001b[0m base_path\u001b[38;5;241m=\u001b[39mbase_path,\n\u001b[1;32m 1039\u001b[0m allowed_extensions\u001b[38;5;241m=\u001b[39mALL_ALLOWED_EXTENSIONS,\n\u001b[1;32m 1040\u001b[0m download_config\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdownload_config,\n\u001b[1;32m 1041\u001b[0m )\n\u001b[1;32m 1042\u001b[0m module_name, default_builder_kwargs \u001b[38;5;241m=\u001b[39m infer_module_for_data_files(\n\u001b[1;32m 1043\u001b[0m data_files\u001b[38;5;241m=\u001b[39mdata_files,\n\u001b[1;32m 1044\u001b[0m path\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mname,\n\u001b[1;32m 1045\u001b[0m download_config\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdownload_config,\n\u001b[1;32m 1046\u001b[0m )\n", "File \u001b[0;32m~/Documents/HuggingFace/autotrain-playground/.venv/lib/python3.11/site-packages/datasets/data_files.py:457\u001b[0m, in \u001b[0;36mget_data_patterns\u001b[0;34m(base_path, download_config)\u001b[0m\n\u001b[1;32m 455\u001b[0m resolver \u001b[38;5;241m=\u001b[39m partial(resolve_pattern, base_path\u001b[38;5;241m=\u001b[39mbase_path, download_config\u001b[38;5;241m=\u001b[39mdownload_config)\n\u001b[1;32m 456\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 457\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_get_data_files_patterns\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresolver\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 458\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mFileNotFoundError\u001b[39;00m:\n\u001b[1;32m 459\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m EmptyDatasetError(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe directory at \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mbase_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m doesn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt contain any data files\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n", "File \u001b[0;32m~/Documents/HuggingFace/autotrain-playground/.venv/lib/python3.11/site-packages/datasets/data_files.py:248\u001b[0m, in \u001b[0;36m_get_data_files_patterns\u001b[0;34m(pattern_resolver)\u001b[0m\n\u001b[1;32m 246\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m pattern \u001b[38;5;129;01min\u001b[39;00m patterns:\n\u001b[1;32m 247\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 248\u001b[0m data_files \u001b[38;5;241m=\u001b[39m \u001b[43mpattern_resolver\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpattern\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 249\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mFileNotFoundError\u001b[39;00m:\n\u001b[1;32m 250\u001b[0m \u001b[38;5;28;01mcontinue\u001b[39;00m\n", "File \u001b[0;32m~/Documents/HuggingFace/autotrain-playground/.venv/lib/python3.11/site-packages/datasets/data_files.py:332\u001b[0m, in \u001b[0;36mresolve_pattern\u001b[0;34m(pattern, base_path, allowed_extensions, download_config)\u001b[0m\n\u001b[1;32m 330\u001b[0m base_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 331\u001b[0m pattern, storage_options \u001b[38;5;241m=\u001b[39m _prepare_path_and_storage_options(pattern, download_config\u001b[38;5;241m=\u001b[39mdownload_config)\n\u001b[0;32m--> 332\u001b[0m fs, _, _ \u001b[38;5;241m=\u001b[39m \u001b[43mget_fs_token_paths\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpattern\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstorage_options\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 333\u001b[0m fs_base_path \u001b[38;5;241m=\u001b[39m base_path\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m::\u001b[39m\u001b[38;5;124m\"\u001b[39m)[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m://\u001b[39m\u001b[38;5;124m\"\u001b[39m)[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m] \u001b[38;5;129;01mor\u001b[39;00m fs\u001b[38;5;241m.\u001b[39mroot_marker\n\u001b[1;32m 334\u001b[0m fs_pattern \u001b[38;5;241m=\u001b[39m pattern\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m::\u001b[39m\u001b[38;5;124m\"\u001b[39m)[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m://\u001b[39m\u001b[38;5;124m\"\u001b[39m)[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\n", "File \u001b[0;32m~/Documents/HuggingFace/autotrain-playground/.venv/lib/python3.11/site-packages/fsspec/core.py:653\u001b[0m, in \u001b[0;36mget_fs_token_paths\u001b[0;34m(urlpath, mode, num, name_function, storage_options, protocol, expand)\u001b[0m\n\u001b[1;32m 651\u001b[0m paths \u001b[38;5;241m=\u001b[39m _expand_paths(paths, name_function, num)\n\u001b[1;32m 652\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m*\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m paths:\n\u001b[0;32m--> 653\u001b[0m paths \u001b[38;5;241m=\u001b[39m [f \u001b[38;5;28;01mfor\u001b[39;00m f \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28msorted\u001b[39m(\u001b[43mfs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mglob\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpaths\u001b[49m\u001b[43m)\u001b[49m) \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m fs\u001b[38;5;241m.\u001b[39misdir(f)]\n\u001b[1;32m 654\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 655\u001b[0m paths \u001b[38;5;241m=\u001b[39m [paths]\n", "File \u001b[0;32m~/Documents/HuggingFace/autotrain-playground/.venv/lib/python3.11/site-packages/fsspec/spec.py:606\u001b[0m, in \u001b[0;36mAbstractFileSystem.glob\u001b[0;34m(self, path, maxdepth, **kwargs)\u001b[0m\n\u001b[1;32m 602\u001b[0m depth \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 604\u001b[0m allpaths \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfind(root, maxdepth\u001b[38;5;241m=\u001b[39mdepth, withdirs\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, detail\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m--> 606\u001b[0m pattern \u001b[38;5;241m=\u001b[39m \u001b[43mglob_translate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mends_with_sep\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 607\u001b[0m pattern \u001b[38;5;241m=\u001b[39m re\u001b[38;5;241m.\u001b[39mcompile(pattern)\n\u001b[1;32m 609\u001b[0m out \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 610\u001b[0m p: info\n\u001b[1;32m 611\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m p, info \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28msorted\u001b[39m(allpaths\u001b[38;5;241m.\u001b[39mitems())\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 618\u001b[0m )\n\u001b[1;32m 619\u001b[0m }\n", "File \u001b[0;32m~/Documents/HuggingFace/autotrain-playground/.venv/lib/python3.11/site-packages/fsspec/utils.py:734\u001b[0m, in \u001b[0;36mglob_translate\u001b[0;34m(pat)\u001b[0m\n\u001b[1;32m 732\u001b[0m \u001b[38;5;28;01mcontinue\u001b[39;00m\n\u001b[1;32m 733\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m**\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m part:\n\u001b[0;32m--> 734\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 735\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInvalid pattern: \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m**\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m can only be an entire path component\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 736\u001b[0m )\n\u001b[1;32m 737\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m part:\n\u001b[1;32m 738\u001b[0m results\u001b[38;5;241m.\u001b[39mextend(_translate(part, \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnot_sep\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m*\u001b[39m\u001b[38;5;124m\"\u001b[39m, not_sep))\n", "\u001b[0;31mValueError\u001b[0m: Invalid pattern: '**' can only be an entire path component" ] } ], "source": [ "dataset_name = 'ai-aerospace/ams_data_train_generic_v0.1_100'\n", "dataset=load_dataset(dataset_name)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.1" } }, "nbformat": 4, "nbformat_minor": 2 }