{ "cells": [ { "cell_type": "code", "source": [ "!pip install transformers accelerate bitsandbytes sentencepiece einops" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "tJQbBFX7iZnj", "outputId": "a0c34c5b-887c-4c68-e5f9-144d499f7926" }, "execution_count": 1, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting transformers\n", " Downloading transformers-4.32.1-py3-none-any.whl (7.5 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.5/7.5 MB\u001b[0m \u001b[31m15.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting accelerate\n", " Downloading accelerate-0.22.0-py3-none-any.whl (251 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m251.2/251.2 kB\u001b[0m \u001b[31m24.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting bitsandbytes\n", " Downloading bitsandbytes-0.41.1-py3-none-any.whl (92.6 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.6/92.6 MB\u001b[0m \u001b[31m10.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting sentencepiece\n", " Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m57.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting einops\n", " Downloading einops-0.6.1-py3-none-any.whl (42 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m42.2/42.2 kB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.12.2)\n", "Collecting huggingface-hub<1.0,>=0.15.1 (from transformers)\n", " Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m268.8/268.8 kB\u001b[0m \u001b[31m26.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.23.5)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (23.1)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n", "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2023.6.3)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.31.0)\n", "Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)\n", " Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m75.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting safetensors>=0.3.1 (from transformers)\n", " Downloading safetensors-0.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m71.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.66.1)\n", "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate) (5.9.5)\n", "Requirement already satisfied: torch>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from accelerate) (2.0.1+cu118)\n", "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.15.1->transformers) (2023.6.0)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.15.1->transformers) (4.7.1)\n", "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (1.12)\n", "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.1)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.1.2)\n", "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (2.0.0)\n", "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.10.0->accelerate) (3.27.2)\n", "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.10.0->accelerate) (16.0.6)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.2.0)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.4)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2023.7.22)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.10.0->accelerate) (2.1.3)\n", "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.10.0->accelerate) (1.3.0)\n", "Installing collected packages: tokenizers, sentencepiece, safetensors, bitsandbytes, einops, huggingface-hub, transformers, accelerate\n", "Successfully installed accelerate-0.22.0 bitsandbytes-0.41.1 einops-0.6.1 huggingface-hub-0.16.4 safetensors-0.3.3 sentencepiece-0.1.99 tokenizers-0.13.3 transformers-4.32.1\n" ] } ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "id": "yfuda1Wn5XT9" }, "outputs": [], "source": [ "import torch\n", "from transformers import AutoModelForCausalLM, AutoTokenizer\n", "\n", "TOKENIZER_ID = \"AIBunCho/japanese-novel-gpt-j-6b\"\n", "MODEL_ID = \"tsukemono/japanese-novel-gpt-j-6b-f16-marisa\"" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "id": "cDq4-8Z7QriO", "colab": { "base_uri": "https://localhost:8080/", "height": 217, "referenced_widgets": [ "dcdfae31b60c4c8ab34014069c73b581", "3bed92250355445ab425e8f9331eac2c", "39367ae27338432da007d74ce664001f", "af54f36c787743fb97c04c140aa7e82f", "df0152b57de74abe9be344b05322e0ee", "6361b4f44430442c9c0e37db6296be88", "58641328a8434b3e80ee0751f749b236", "7c11f9b355594e46ad053a8c658b12b0", "d24b84da0263409aac9dd62efbb0c364", "9b1b96653c0b4dc9b603c5be54f075c0", "08dbc71a644c41c6960ddf0a315a564b", "6264d9f755d84eee89807e185a416cb0", "46c08e00c80a4140ace2dec8ddf84339", "9f1ff860ed52400899bbb6376a379733", "66e62659f39d4c6a882fc5b1a1b358bd", "c3e622d21e834ab1b505edffbf8e60bf", "040c302248414d2aaec40e6efdd2b9e4", "b06aa4ea76d34ccb9b48a03902ba1529", "124bb7a81e4c4fd983ac30b0e6e1df78", "aeea67ecc5c14cedb73393b30216d8b7", "0c4c344cc5e54317ac36da0fcc78b620", "91093e8523aa42219e0fc3212b0ca4dd", "1b235823fb48425c8703a9ebc39f2e2e", "f2a0bb521e984253aa4203a6310097ef", "50b46e9f9d904fbc9fa6c5a1040144be", "8ebaaf7c95d94812a487c5a039a4b3e9", "6e587d039b524c8ea79f22bd0f5a9259", "f090e1f2141e4d03b5777ec9ea8cb6b2", "d77c368e556b499e8dfd091346b3d275", "63115e818ffc4b5597c309b074a1be59", "3122028754d2448fbc3ed80cab7ac244", "499e5e75c52441c7b2bc2323d80e1172", "34b1d4c17d8047adaee8ae1a21862625", "ba249a1a32d64d3382e1a59a7735842e", "673549470c834a6d8cac56623ea7c86b", "2a9bf34de29d4744a28c76770454e8ee", "d061e301a9cb4a0dadd67b6a43b37757", "18a1ec5991494fb6ae9d58ab34e2dc8a", "2556bee8cfc047a684130b6ddddee552", "a9f58e079b66402a99c7424edab9a010", "fb7e64aae9024282ac26aa1fe9d395c2", "7f37584d4f1c45e2b3137d36577dc624", "8ec7a9770f5846a985884edcacc2ae3e", "d7751471eb464633ad44596fd17969e7" ] }, "outputId": "98f6a8ef-10b0-4322-b701-48f08cf22169" }, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "Downloading (…)okenizer_config.json: 0%| | 0.00/548 [00:00