diff --git "a/Untitled1.ipynb" "b/Untitled1.ipynb" new file mode 100644--- /dev/null +++ "b/Untitled1.ipynb" @@ -0,0 +1,2319 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "3cb8b683-840f-4981-af83-018f067c5c94", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Defaulting to user installation because normal site-packages is not writeable\n", + "Collecting transformers\n", + " Downloading transformers-4.47.0-py3-none-any.whl (10.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m10.1/10.1 MB\u001b[0m \u001b[31m59.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m0:01\u001b[0m\n", + "\u001b[?25hCollecting huggingface-hub<1.0,>=0.24.0\n", + " Downloading huggingface_hub-0.27.0-py3-none-any.whl (450 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m450.5/450.5 KB\u001b[0m \u001b[31m36.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting tqdm>=4.27\n", + " Downloading tqdm-4.67.1-py3-none-any.whl (78 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m78.5/78.5 KB\u001b[0m \u001b[31m12.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/lib/python3/dist-packages (from transformers) (1.21.5)\n", + "Requirement already satisfied: filelock in /usr/lib/python3/dist-packages (from transformers) (3.6.0)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/lib/python3/dist-packages (from transformers) (21.3)\n", + "Collecting safetensors>=0.4.1\n", + " Downloading safetensors-0.4.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (435 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m435.0/435.0 KB\u001b[0m \u001b[31m49.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting tokenizers<0.22,>=0.21\n", + " Downloading tokenizers-0.21.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.0/3.0 MB\u001b[0m \u001b[31m71.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n", + "\u001b[?25hCollecting regex!=2019.12.17\n", + " Downloading regex-2024.11.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (781 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m781.7/781.7 KB\u001b[0m \u001b[31m59.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: requests in /usr/lib/python3/dist-packages (from transformers) (2.25.1)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/lib/python3/dist-packages (from transformers) (5.4.1)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/lib/python3/dist-packages (from huggingface-hub<1.0,>=0.24.0->transformers) (2024.3.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/lib/python3/dist-packages (from huggingface-hub<1.0,>=0.24.0->transformers) (4.9.0)\n", + "Installing collected packages: tqdm, safetensors, regex, huggingface-hub, tokenizers, transformers\n", + "Successfully installed huggingface-hub-0.27.0 regex-2024.11.6 safetensors-0.4.5 tokenizers-0.21.0 tqdm-4.67.1 transformers-4.47.0\n" + ] + } + ], + "source": [ + "!pip install transformers" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "e3672a9c-c8ab-4d13-9498-5450ca3d95c3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "README.md\t\t\t model-00002-of-00004.safetensors\n", + "Untitled.ipynb\t\t\t model-00003-of-00004.safetensors\n", + "Untitled1.ipynb\t\t\t model-00004-of-00004.safetensors\n", + "added_tokens.json\t\t model.safetensors.index.json\n", + "checkpoint-120\t\t\t pytorch_model-00001-of-00002.bin\n", + "checkpoint-40\t\t\t pytorch_model-00002-of-00002.bin\n", + "checkpoint-80\t\t\t pytorch_model.bin.index.json\n", + "checkpoint-90\t\t\t special_tokens_map.json\n", + "config.json\t\t\t tokenizer.json\n", + "generation_config.json\t\t tokenizer_config.json\n", + "merges.txt\t\t\t training_args.bin\n", + "model-00001-of-00004.safetensors vocab.json\n" + ] + } + ], + "source": [ + "!ls" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "4a7a57e8-6bc3-4d15-88ac-942e8347b7cd", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading checkpoint shards: 100%|██████████| 4/4 [00:31<00:00, 7.82s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model and tokenizer loaded successfully!\n" + ] + } + ], + "source": [ + "from transformers import AutoModelForCausalLM, AutoTokenizer\n", + "\n", + "# Path to your local checkpoint directory\n", + "checkpoint_path = \"checkpoint-80\"\n", + "\n", + "# Load the model and tokenizer\n", + "model = AutoModelForCausalLM.from_pretrained(checkpoint_path)\n", + "tokenizer = AutoTokenizer.from_pretrained(checkpoint_path)\n", + "\n", + "# Verify loading\n", + "print(\"Model and tokenizer loaded successfully!\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "c3b9fcde-a4f3-4217-ba9a-cf799800cd63", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RepoUrl('https://huggingface.co/neginashz/checkpoint-80', endpoint='https://huggingface.co', repo_type='model', repo_id='neginashz/checkpoint-80')" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from huggingface_hub import create_repo\n", + "\n", + "create_repo(repo_id=\"neginashz/checkpoint-80\", private=False, exist_ok=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6e19721a-b69c-42e6-a740-04e4b8ded4fa", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Upload 7 LFS files: 0%| | 0/7 [00:00