{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "28e4c4d1-a73f-437b-a1bd-c2cc3874924a" }, "source": [ "# 강의 11주차: midm-food-order-understanding\n", "\n", "1. KT-AI/midm-bitext-S-7B-inst-v1 를 주문 문장 이해에 미세 튜닝\n", "\n", "- food-order-understanding-small-3200.json (학습)\n", "- food-order-understanding-small-800.json (검증)\n", "\n", "\n", "종속적인 필요 내용\n", "- huggingface 계정 설정 및 llama-2 사용 승인\n", "- 로깅을 위한 wandb (log 기록됨)" ], "id": "28e4c4d1-a73f-437b-a1bd-c2cc3874924a" }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "nDZe_wqKU6J3", "outputId": "184c46fb-9706-4e52-9193-a73c9e8eac50" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.35.2)\n", "Requirement already satisfied: peft in /usr/local/lib/python3.10/dist-packages (0.7.0)\n", "Requirement already satisfied: accelerate in /usr/local/lib/python3.10/dist-packages (0.25.0)\n", "Requirement already satisfied: optimum in /usr/local/lib/python3.10/dist-packages (1.15.0)\n", "Requirement already satisfied: bitsandbytes in /usr/local/lib/python3.10/dist-packages (0.41.3)\n", "Requirement already satisfied: trl in /usr/local/lib/python3.10/dist-packages (0.7.4)\n", "Requirement already satisfied: wandb in /usr/local/lib/python3.10/dist-packages (0.16.1)\n", "Requirement already satisfied: einops in /usr/local/lib/python3.10/dist-packages (0.7.0)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.13.1)\n", "Requirement already satisfied: huggingface-hub<1.0,>=0.16.4 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.19.4)\n", "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.23.5)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (23.2)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n", "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2023.6.3)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.31.0)\n", "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.15.0)\n", "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.4.1)\n", "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.66.1)\n", "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from peft) (5.9.5)\n", "Requirement already satisfied: torch>=1.13.0 in /usr/local/lib/python3.10/dist-packages (from peft) (2.1.0+cu118)\n", "Requirement already satisfied: coloredlogs in /usr/local/lib/python3.10/dist-packages (from optimum) (15.0.1)\n", "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from optimum) (1.12)\n", "Requirement already satisfied: datasets in /usr/local/lib/python3.10/dist-packages (from optimum) (2.15.0)\n", "Requirement already satisfied: tyro>=0.5.11 in /usr/local/lib/python3.10/dist-packages (from trl) (0.6.0)\n", "Requirement already satisfied: Click!=8.0.0,>=7.1 in /usr/local/lib/python3.10/dist-packages (from wandb) (8.1.7)\n", "Requirement already satisfied: GitPython!=3.1.29,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from wandb) (3.1.40)\n", "Requirement already satisfied: sentry-sdk>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from wandb) (1.38.0)\n", "Requirement already satisfied: docker-pycreds>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from wandb) (0.4.0)\n", "Requirement already satisfied: setproctitle in /usr/local/lib/python3.10/dist-packages (from wandb) (1.3.3)\n", "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from wandb) (67.7.2)\n", "Requirement already satisfied: appdirs>=1.4.3 in /usr/local/lib/python3.10/dist-packages (from wandb) (1.4.4)\n", "Requirement already satisfied: protobuf!=4.21.0,<5,>=3.19.0 in /usr/local/lib/python3.10/dist-packages (from wandb) (3.20.3)\n", "Requirement already satisfied: six>=1.4.0 in /usr/local/lib/python3.10/dist-packages (from docker-pycreds>=0.4.0->wandb) (1.16.0)\n", "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.10/dist-packages (from GitPython!=3.1.29,>=1.0.0->wandb) (4.0.11)\n", "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->transformers) (2023.6.0)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->transformers) (4.5.0)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.6)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.7)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2023.11.17)\n", "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (3.2.1)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (3.1.2)\n", "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (2.1.0)\n", "Requirement already satisfied: sentencepiece!=0.1.92,>=0.1.91 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.1.99)\n", "Requirement already satisfied: docstring-parser>=0.14.1 in /usr/local/lib/python3.10/dist-packages (from tyro>=0.5.11->trl) (0.15)\n", "Requirement already satisfied: rich>=11.1.0 in /usr/local/lib/python3.10/dist-packages (from tyro>=0.5.11->trl) (13.7.0)\n", "Requirement already satisfied: shtab>=1.5.6 in /usr/local/lib/python3.10/dist-packages (from tyro>=0.5.11->trl) (1.6.5)\n", "Requirement already satisfied: humanfriendly>=9.1 in /usr/local/lib/python3.10/dist-packages (from coloredlogs->optimum) (10.0)\n", "Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets->optimum) (9.0.0)\n", "Requirement already satisfied: pyarrow-hotfix in /usr/local/lib/python3.10/dist-packages (from datasets->optimum) (0.6)\n", "Requirement already satisfied: dill<0.3.8,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from datasets->optimum) (0.3.7)\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets->optimum) (1.5.3)\n", "Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets->optimum) (3.4.1)\n", "Requirement already satisfied: multiprocess in /usr/local/lib/python3.10/dist-packages (from datasets->optimum) (0.70.15)\n", "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets->optimum) (3.9.1)\n", "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->optimum) (1.3.0)\n", "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->optimum) (23.1.0)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->optimum) (6.0.4)\n", "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->optimum) (1.9.3)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->optimum) (1.4.0)\n", "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->optimum) (1.3.1)\n", "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->optimum) (4.0.3)\n", "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.10/dist-packages (from gitdb<5,>=4.0.1->GitPython!=3.1.29,>=1.0.0->wandb) (5.0.1)\n", "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=11.1.0->tyro>=0.5.11->trl) (3.0.0)\n", "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich>=11.1.0->tyro>=0.5.11->trl) (2.16.1)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.13.0->peft) (2.1.3)\n", "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets->optimum) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets->optimum) (2023.3.post1)\n", "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=11.1.0->tyro>=0.5.11->trl) (0.1.2)\n" ] } ], "source": [ "pip install transformers peft accelerate optimum bitsandbytes trl wandb einops" ], "id": "nDZe_wqKU6J3" }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "51eb00d7-2928-41ad-9ae9-7f0da7d64d6d", "outputId": "480d3d43-f54e-45eb-b1d9-3659324d55f9" }, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.10/dist-packages/trl/trainer/ppo_config.py:141: UserWarning: The `optimize_cuda_cache` arguement will be deprecated soon, please use `optimize_device_cache` instead.\n", " warnings.warn(\n" ] } ], "source": [ "import os\n", "from dataclasses import dataclass, field\n", "from typing import Optional\n", "import re\n", "\n", "import torch\n", "import tyro\n", "from accelerate import Accelerator\n", "from datasets import load_dataset, Dataset\n", "from peft import AutoPeftModelForCausalLM, LoraConfig\n", "from tqdm import tqdm\n", "from transformers import (\n", " AutoModelForCausalLM,\n", " AutoTokenizer,\n", " BitsAndBytesConfig,\n", " TrainingArguments,\n", ")\n", "\n", "from trl import SFTTrainer\n", "\n", "from trl.trainer import ConstantLengthDataset" ], "id": "51eb00d7-2928-41ad-9ae9-7f0da7d64d6d" }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 145, "referenced_widgets": [ "39f60d5965554427af7d777ddfdb5c6e", "830abf25e3284b83b50259838de25461", "e3be9c1a41484139826db7c7e8bad684", "1b19e49a7fbb474c82f1608ec5cf01be", "47e6f26a412641d4bd7aee815075d817", "9c9ef3925e754ae2bd2948517c87ae8d", "2122301e2a8f481ebb22f718d5d38576", "84b344d92503448bafad168a2bcf70a3", "4225ce6045154131b73498b1b7b55d6d", "ce039ec952f14f35957b9b1f67242e87", "de82245c43a84ac992b7d99fcc360bdc", "1c379cd72c454368b2d7fa8f13d81c14", "f1960c78e0b9417a9d1dbcfac32be626", "14245abecc01427bb58bb10ae83df2e1", "ff827de069434301a4c7510d90d1ff5f", "6103bae35e6843ae93c45b4ac1433f3a", "105de6d562674a28a0817bdc4c933551", "155bef4539784268a616235b016d5309", "f50840f61e1e414094a9d7951e7d3ce8", "16821a4867544d84bcf301a95a56e83f", "868228753b36406daa2dee41b0dd3874", "452c00fbce3b425f940a72707a8792cc", "7dbdbc659e17416ab15171fe0379184f", "72654bb34fb04051bd9dcce3abf6188d", "353d5817b86c4d61b58956bb50da83b6", "dc992e598e364004b435d0e7dd01f7c2", "b4f83ff147854a7b81fd9510981adc36", "08bb43115500409b99b4710dab7c5237", "c8b962a661a641a1bc6b633f5ca1fbe8", "5a240d0c09664383975038ac835f4f86", "b6c7a0c22fa2426bb9d3b9adcc465e67", "cd2ce3e0aead44ca9ffd2c8d00f473e6" ] }, "id": "tX7gYxZaVhYL", "outputId": "5cff0d05-0481-48ab-fe2a-6b34b917d198" }, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "VBox(children=(HTML(value='
로깅정보가 여기에 담김" ], "id": "372c64be-8fc0-4cc8-bda8-92ecf3632cc3" }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "bac62c01-21ef-491e-a686-cf4988186c58", "outputId": "b236645b-14b6-4968-aad7-63ff47d03396" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "ScriptArguments(cache_dir=None, model_name='jangmin/midm-7b-safetensors-only', dataset_name='/gdrive/MyDrive/food-order-understanding-small-3200.json', seq_length=64, num_workers=2, training_args=TrainingArguments(\n", "_n_gpu=1,\n", "adafactor=False,\n", "adam_beta1=0.9,\n", "adam_beta2=0.999,\n", "adam_epsilon=1e-08,\n", "auto_find_batch_size=False,\n", "bf16=False,\n", "bf16_full_eval=False,\n", "data_seed=None,\n", "dataloader_drop_last=False,\n", "dataloader_num_workers=0,\n", "dataloader_pin_memory=True,\n", "ddp_backend=None,\n", "ddp_broadcast_buffers=None,\n", "ddp_bucket_cap_mb=None,\n", "ddp_find_unused_parameters=None,\n", "ddp_timeout=1800,\n", "debug=[],\n", "deepspeed=None,\n", "disable_tqdm=False,\n", "dispatch_batches=None,\n", "do_eval=False,\n", "do_predict=False,\n", "do_train=False,\n", "eval_accumulation_steps=None,\n", "eval_delay=0,\n", "eval_steps=None,\n", "evaluation_strategy=no,\n", "fp16=False,\n", "fp16_backend=auto,\n", "fp16_full_eval=False,\n", "fp16_opt_level=O1,\n", "fsdp=[],\n", "fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False},\n", "fsdp_min_num_params=0,\n", "fsdp_transformer_layer_cls_to_wrap=None,\n", "full_determinism=False,\n", "gradient_accumulation_steps=2,\n", "gradient_checkpointing=False,\n", "gradient_checkpointing_kwargs=None,\n", "greater_is_better=None,\n", "group_by_length=False,\n", "half_precision_backend=auto,\n", "hub_always_push=False,\n", "hub_model_id=None,\n", "hub_private_repo=False,\n", "hub_strategy=every_save,\n", "hub_token=,\n", "ignore_data_skip=False,\n", "include_inputs_for_metrics=False,\n", "include_tokens_per_second=False,\n", "jit_mode_eval=False,\n", "label_names=None,\n", "label_smoothing_factor=0.0,\n", "learning_rate=0.0001,\n", "length_column_name=length,\n", "load_best_model_at_end=False,\n", "local_rank=0,\n", "log_level=passive,\n", "log_level_replica=warning,\n", "log_on_each_node=True,\n", "logging_dir=./results/runs/Dec11_09-14-48_26095bbc49ee,\n", "logging_first_step=False,\n", "logging_nan_inf_filter=True,\n", "logging_steps=50,\n", "logging_strategy=steps,\n", "lr_scheduler_type=cosine,\n", "max_grad_norm=0.3,\n", "max_steps=300,\n", "metric_for_best_model=None,\n", "mp_parameters=,\n", "neftune_noise_alpha=None,\n", "no_cuda=False,\n", "num_train_epochs=1,\n", "optim=paged_adamw_32bit,\n", "optim_args=None,\n", "output_dir=/gdrive/MyDrive/lora-midm-7b-food-order-understanding,\n", "overwrite_output_dir=False,\n", "past_index=-1,\n", "per_device_eval_batch_size=1,\n", "per_device_train_batch_size=1,\n", "prediction_loss_only=False,\n", "push_to_hub=False,\n", "push_to_hub_model_id=None,\n", "push_to_hub_organization=None,\n", "push_to_hub_token=,\n", "ray_scope=last,\n", "remove_unused_columns=False,\n", "report_to=['wandb'],\n", "resume_from_checkpoint=None,\n", "run_name=midm-7b-food-order-understanding,\n", "save_on_each_node=False,\n", "save_safetensors=True,\n", "save_steps=500,\n", "save_strategy=epoch,\n", "save_total_limit=20,\n", "seed=42,\n", "skip_memory_metrics=True,\n", "split_batches=False,\n", "tf32=None,\n", "torch_compile=False,\n", "torch_compile_backend=None,\n", "torch_compile_mode=None,\n", "torchdynamo=None,\n", "tpu_metrics_debug=False,\n", "tpu_num_cores=None,\n", "use_cpu=False,\n", "use_ipex=False,\n", "use_legacy_prediction_loop=False,\n", "use_mps_device=False,\n", "warmup_ratio=0.03,\n", "warmup_steps=0,\n", "weight_decay=0.05,\n", "), packing=True, peft_config=LoraConfig(peft_type=, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='CAUSAL_LM', inference_mode=False, r=8, target_modules={'c_attn', 'c_proj', 'c_fc'}, lora_alpha=16, lora_dropout=0.05, fan_in_fan_out=False, bias='none', modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={}, megatron_config=None, megatron_core='megatron.core', loftq_config={}), merge_with_final_checkpoint=False)\n" ] } ], "source": [ "print(script_args)" ], "id": "bac62c01-21ef-491e-a686-cf4988186c58" }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "1ff1422e-184d-4438-b033-40ae8bdaa5fd" }, "outputs": [], "source": [ "bnb_config = BitsAndBytesConfig(\n", " load_in_4bit=True,\n", " bnb_4bit_quant_type=\"nf4\",\n", " bnb_4bit_compute_dtype=torch.bfloat16,\n", ") #원본모델 4비트로 양자화" ], "id": "1ff1422e-184d-4438-b033-40ae8bdaa5fd" }, { "cell_type": "markdown", "metadata": { "id": "elg7gcB-5zb7" }, "source": [ "원본인 'KT-AI/midm-bitext-S-7B-inst-v1' 는 *.bin 형태로 모델을 제공한다.\n", "- 코랩에서 CPU 메모리 부족 발생\n", "\n", "해결책\n", "- safetensors로 변환한 모델을 업로드 하고 이를 사용하기로 한다." ], "id": "elg7gcB-5zb7" }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 122, "referenced_widgets": [ "767e90f44f384e0e8b6c4d1706493c58", "5b10b584afb34c1492ce0de08fa79144", "d9be240b803e43ca9bdb3475f21982cd", "0ad2469f323a49daafecbc14d3b23c65", "335fe5e5da6c448eafdb2e33fc85ca40", "1949ff59aa09450a902ae2de8c21f573", "be78758cc7cd47bcb87da6ed79379fc1", "7dd8a76b2eb242cbb88b78cad4093040", "12b1baa57d574403bc532a31535ef487", "9896c7957d134c7baa78e15d3d3d5711", "226720cf06b445adb4ed67ffa5888667" ] }, "id": "15c8425e-bb0b-40c5-bfe8-385bac699b9d", "outputId": "0cd996ae-c109-4011-bfb4-3515aeb337e5" }, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.10/dist-packages/transformers/models/auto/auto_factory.py:472: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.\n", " warnings.warn(\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "Loading checkpoint shards: 0%| | 0/5 [00:00 어차피 코랩이 빠르니까 그냥 로딩해서 쓰자" ], "id": "15c8425e-bb0b-40c5-bfe8-385bac699b9d" }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "E9D239NqbDba", "outputId": "717e7ce1-9b44-49c3-d578-7794230f061a" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "MidmLMHeadModel(\n", " (transformer): MidmModel(\n", " (wte): Embedding(72192, 4096)\n", " (rotary_pos_emb): RotaryEmbedding()\n", " (drop): Dropout(p=0.0, inplace=False)\n", " (h): ModuleList(\n", " (0-31): 32 x MidmBlock(\n", " (ln_1): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)\n", " (attn): MidmAttention(\n", " (c_attn): Linear4bit(in_features=4096, out_features=12288, bias=False)\n", " (c_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)\n", " (attn_dropout): Dropout(p=0.0, inplace=False)\n", " (resid_dropout): Dropout(p=0.0, inplace=False)\n", " )\n", " (ln_2): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)\n", " (mlp): MidmMLP(\n", " (c_fc): Linear4bit(in_features=4096, out_features=21760, bias=False)\n", " (c_proj): Linear4bit(in_features=10880, out_features=4096, bias=False)\n", " (dropout): Dropout(p=0.0, inplace=False)\n", " )\n", " )\n", " )\n", " (ln_f): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)\n", " )\n", " (lm_head): Linear(in_features=4096, out_features=72192, bias=False)\n", ")" ] }, "metadata": {}, "execution_count": 15 } ], "source": [ "base_model\n", "#linear모듈들이 다 4비트로 바껴있음, 아직 peft모델은 붙이지않음\n", "#토큰의사이즈도 크고 컨택스트의 길이도 길어서 메모리를 현재 많이 차지하고있음" ], "id": "E9D239NqbDba" }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "d37b485f-4fd3-404f-ab02-2bf3e93b3fc2" }, "outputs": [], "source": [ "peft_config = script_args.peft_config" ], "id": "d37b485f-4fd3-404f-ab02-2bf3e93b3fc2" }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "4420fcc4-2bac-413d-b7aa-89455c512419", "outputId": "e0556947-f27c-441a-d028-60f4d91f706f" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "LoraConfig(peft_type=, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='CAUSAL_LM', inference_mode=False, r=8, target_modules={'c_attn', 'c_proj', 'c_fc'}, lora_alpha=16, lora_dropout=0.05, fan_in_fan_out=False, bias='none', modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={}, megatron_config=None, megatron_core='megatron.core', loftq_config={})" ] }, "metadata": {}, "execution_count": 17 } ], "source": [ "peft_config" ], "id": "4420fcc4-2bac-413d-b7aa-89455c512419" }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "f47f9584-3988-46b8-a062-29dcde75a0e2" }, "outputs": [], "source": [ "tokenizer = AutoTokenizer.from_pretrained(\n", " 'KT-AI/midm-bitext-S-7B-inst-v1',\n", " # script_args.model_name,\n", " trust_remote_code=True,\n", " cache_dir=script_args.cache_dir,\n", ")\n", "\n", "#미묘한부분\n", "if getattr(tokenizer, \"pad_token\", None) is None:\n", " tokenizer.pad_token = tokenizer.eos_token #패드토큰을 eos token으로 사용\n", "tokenizer.padding_side = \"right\" # Fix weird overflow issue with fp16 training\n", "\n", "tokenizer.add_special_tokens(dict(bos_token=''))\n", "\n", "base_model.config.pad_token_id = tokenizer.pad_token_id\n", "base_model.config.bos_token_id = tokenizer.bos_token_id" ], "id": "f47f9584-3988-46b8-a062-29dcde75a0e2" }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "abd17c83-ab8d-44cb-b69b-fc0936c2cec5" }, "outputs": [], "source": [ "training_args = script_args.training_args" ], "id": "abd17c83-ab8d-44cb-b69b-fc0936c2cec5" }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "62e8139f-5179-4c75-84a7-0c818ab0a35a", "outputId": "b9bb0168-1519-48d5-99d2-332103c52883" }, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "100%|██████████| 400/400 [00:00<00:00, 2392.09it/s]" ] }, { "output_type": "stream", "name": "stdout", "text": [ "The character to token ratio of the dataset is: 1.48\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ "\n", "/usr/local/lib/python3.10/dist-packages/trl/trainer/utils.py:548: UserWarning: The passed formatting_func has more than one argument. Usually that function should have a single argument `example` which corresponds to the dictionary returned by each element of the dataset. Make sure you know what you are doing.\n", " warnings.warn(\n" ] } ], "source": [ "train_dataset = create_datasets(tokenizer, script_args)" ], "id": "62e8139f-5179-4c75-84a7-0c818ab0a35a" }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "000314e9-f10b-4685-8da6-0511494a9eb4", "outputId": "6be8df61-7d91-4e94-c0bc-7e04fee2705c" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "2000" ] }, "metadata": {}, "execution_count": 21 } ], "source": [ "len(train_dataset)" ], "id": "000314e9-f10b-4685-8da6-0511494a9eb4" }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "4ba80a64-0ec7-4b29-ac95-7b3d34549f17", "outputId": "c09d1f11-220b-4966-9c03-8fc03708283c" }, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.10/dist-packages/trl/trainer/sft_trainer.py:267: UserWarning: You passed `packing=True` to the SFTTrainer, and you are training your model with `max_steps` strategy. The dataset will be iterated until the `max_steps` are reached.\n", " warnings.warn(\n" ] } ], "source": [ "trainer = SFTTrainer(\n", " model=base_model,\n", " train_dataset=train_dataset,\n", " eval_dataset=None,\n", " peft_config=peft_config, #중요\n", " packing=script_args.packing,\n", " max_seq_length=script_args.seq_length, #청크사이즈 설정\n", " tokenizer=tokenizer,\n", " args=training_args,\n", ")" ], "id": "4ba80a64-0ec7-4b29-ac95-7b3d34549f17" }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Q9GywqcxvN_9", "outputId": "b3bde7a2-f2bc-41af-d7e2-a736a6205bf5" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "MidmLMHeadModel(\n", " (transformer): MidmModel(\n", " (wte): Embedding(72192, 4096)\n", " (rotary_pos_emb): RotaryEmbedding()\n", " (drop): Dropout(p=0.0, inplace=False)\n", " (h): ModuleList(\n", " (0-31): 32 x MidmBlock(\n", " (ln_1): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)\n", " (attn): MidmAttention(\n", " (c_attn): lora.Linear4bit(\n", " (base_layer): Linear4bit(in_features=4096, out_features=12288, bias=False)\n", " (lora_dropout): ModuleDict(\n", " (default): Dropout(p=0.05, inplace=False)\n", " )\n", " (lora_A): ModuleDict(\n", " (default): Linear(in_features=4096, out_features=8, bias=False)\n", " )\n", " (lora_B): ModuleDict(\n", " (default): Linear(in_features=8, out_features=12288, bias=False)\n", " )\n", " (lora_embedding_A): ParameterDict()\n", " (lora_embedding_B): ParameterDict()\n", " )\n", " (c_proj): lora.Linear4bit(\n", " (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)\n", " (lora_dropout): ModuleDict(\n", " (default): Dropout(p=0.05, inplace=False)\n", " )\n", " (lora_A): ModuleDict(\n", " (default): Linear(in_features=4096, out_features=8, bias=False)\n", " )\n", " (lora_B): ModuleDict(\n", " (default): Linear(in_features=8, out_features=4096, bias=False)\n", " )\n", " (lora_embedding_A): ParameterDict()\n", " (lora_embedding_B): ParameterDict()\n", " )\n", " (attn_dropout): Dropout(p=0.0, inplace=False)\n", " (resid_dropout): Dropout(p=0.0, inplace=False)\n", " )\n", " (ln_2): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)\n", " (mlp): MidmMLP(\n", " (c_fc): lora.Linear4bit(\n", " (base_layer): Linear4bit(in_features=4096, out_features=21760, bias=False)\n", " (lora_dropout): ModuleDict(\n", " (default): Dropout(p=0.05, inplace=False)\n", " )\n", " (lora_A): ModuleDict(\n", " (default): Linear(in_features=4096, out_features=8, bias=False)\n", " )\n", " (lora_B): ModuleDict(\n", " (default): Linear(in_features=8, out_features=21760, bias=False)\n", " )\n", " (lora_embedding_A): ParameterDict()\n", " (lora_embedding_B): ParameterDict()\n", " )\n", " (c_proj): lora.Linear4bit(\n", " (base_layer): Linear4bit(in_features=10880, out_features=4096, bias=False)\n", " (lora_dropout): ModuleDict(\n", " (default): Dropout(p=0.05, inplace=False)\n", " )\n", " (lora_A): ModuleDict(\n", " (default): Linear(in_features=10880, out_features=8, bias=False)\n", " )\n", " (lora_B): ModuleDict(\n", " (default): Linear(in_features=8, out_features=4096, bias=False)\n", " )\n", " (lora_embedding_A): ParameterDict()\n", " (lora_embedding_B): ParameterDict()\n", " )\n", " (dropout): Dropout(p=0.0, inplace=False)\n", " )\n", " )\n", " )\n", " (ln_f): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)\n", " )\n", " (lm_head): Linear(in_features=4096, out_features=72192, bias=False)\n", ")" ] }, "metadata": {}, "execution_count": 23 } ], "source": [ "base_model #내부에 peft_config 사용 => 로라 어댑터 붙여짐" ], "id": "Q9GywqcxvN_9" }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "gw9xbeUgbZEo", "outputId": "1da139b2-38ec-4bca-c0c1-d8ec28f21332" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "device(type='cuda', index=0)" ] }, "metadata": {}, "execution_count": 24 } ], "source": [ "base_model.device" ], "id": "gw9xbeUgbZEo" }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "edb204be-ec15-4800-af49-6cfbad2f7f9a", "outputId": "0fe99032-dc26-45bf-abfb-da6ee02c37af" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "trainable params: 16744448 || all params: 3821510656 || trainable%: 0.4381630592527648\n" ] } ], "source": [ "print_trainable_parameters(base_model)" ], "id": "edb204be-ec15-4800-af49-6cfbad2f7f9a" }, { "cell_type": "markdown", "metadata": { "id": "76sRe172fGlm" }, "source": [ "midm 모델을 주문 문장 이해에 적용시 특징\n", "- 모델 로딩 과정에서 CPU도 5.1기가, 디스크 42.4기가, GPU 메모리: 7,4 기가\n", "\n", "구글 코랩 T-4 GPU: 300스텝 (13:47초 예상)\n", "\n", "시퀀스 길이 384의 경우\n", "- 14.7 G / 15.0 G 사용\n", "- 메모리 오버플로우 발생시 이보다 줄일 것" ], "id": "76sRe172fGlm" }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 388 }, "id": "14019fa9-0c6f-4729-ac99-0d407af375b8", "outputId": "e6664d11-f651-462c-bbe5-bc881dfd6eba" }, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33m20211397\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "Tracking run with wandb version 0.16.1" ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "Run data is saved locally in /content/wandb/run-20231211_091655-4b6dc5p8" ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "Syncing run midm-7b-food-order-understanding to Weights & Biases (docs)
" ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ " View project at https://wandb.ai/20211397/huggingface" ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ " View run at https://wandb.ai/20211397/huggingface/runs/4b6dc5p8" ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "\n", "
\n", " \n", " \n", " [300/300 12:47, Epoch 0/1]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining Loss
502.166900
1001.059100
1500.992300
2000.981700
2500.934600
3000.889800

" ] }, "metadata": {} }, { "output_type": "execute_result", "data": { "text/plain": [ "TrainOutput(global_step=300, training_loss=1.1707455444335937, metrics={'train_runtime': 778.6233, 'train_samples_per_second': 0.771, 'train_steps_per_second': 0.385, 'total_flos': 1552584749875200.0, 'train_loss': 1.1707455444335937, 'epoch': 0.3})" ] }, "metadata": {}, "execution_count": 26 } ], "source": [ "trainer.train() # wandb 가입해야함" ], "id": "14019fa9-0c6f-4729-ac99-0d407af375b8" }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "id": "3Y4FQSyRghQt", "outputId": "281bc4b2-077b-44ba-c27d-cc7017e2ffc6" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "'/gdrive/MyDrive/lora-midm-7b-food-order-understanding'" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "string" } }, "metadata": {}, "execution_count": 27 } ], "source": [ "script_args.training_args.output_dir" ], "id": "3Y4FQSyRghQt" }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "49f05450-da2a-4edd-9db2-63836a0ec73a" }, "outputs": [], "source": [ "trainer.save_model(script_args.training_args.output_dir)" ], "id": "49f05450-da2a-4edd-9db2-63836a0ec73a" }, { "cell_type": "markdown", "metadata": { "id": "652f307e-e1d7-43ae-b083-dba2d94c2296" }, "source": [ "# 추론 테스트" ], "id": "652f307e-e1d7-43ae-b083-dba2d94c2296" }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "ea8a1fea-7499-4386-9dea-0509110f61af" }, "outputs": [], "source": [ "from transformers import pipeline, TextStreamer" ], "id": "ea8a1fea-7499-4386-9dea-0509110f61af" }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "52626888-1f6e-46b6-a8dd-836622149ff5" }, "outputs": [], "source": [ "instruction_prompt_template = \"\"\"\n", "###System;너는 사용자의 리뷰를 긍정,부정 중 하나로만 판단해야 한다.\n", "### 리뷰 문장: 진짜 재밌다 ### 분류 결과: 긍정\n", "###System;너는 사용자의 리뷰를 긍정,부정 중 하나로만 판단해야 한다.\n", "### 리뷰 문장: 나 잘 뻔 했잖아 영화보고 지루해서 ### 분류 결과: 부정\n", "###System;너는 사용자의 리뷰를 긍정,부정 중 하나로만 판단해야 한다.\n", "### 리뷰 문장: 어떻게 이렇게까지 재미없을 수가 있지 ### 분류 결과: 부정\n", "###System;너는 사용자의 리뷰를 긍정,부정 중 하나로만 판단해야 한다.\n", "### 리뷰 문장: 열린결말 영화 좋아하는데 이 영화가 열린결말이야 ### 분류 결과: 긍정\n", "\n", "\"\"\"\n", "prompt_template = \"\"\"###System;{System}\n", "###User;{User}\n", "###Midm;\"\"\"\n", "\n", "default_system_msg = (\n", " \"너는 사용자의 리뷰를 긍정,부정 중 하나로만 판단해야 한다.\"\n", ")" ], "id": "52626888-1f6e-46b6-a8dd-836622149ff5" }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "46e844fa-8f63-4359-a4fb-df66e8171796" }, "outputs": [], "source": [ "evaluation_queries = [\n", " \"이게 재밌다는 사람들이 이해가 안가\"\n", " \"너무 흥미로워요 시즌2도 나왔으면 좋겠어요\"\n", " \"이게 무슨 영화야 지루하기짝이없네\"\n", " \"배울점이 많은 영화네요\"\n", "]" ], "id": "46e844fa-8f63-4359-a4fb-df66e8171796" }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "1919cf1f-482e-4185-9d06-e3cea1918416" }, "outputs": [], "source": [ "def wrapper_generate(model, input_prompt, do_stream=False):\n", " data = tokenizer(input_prompt, return_tensors=\"pt\")\n", " streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)\n", " input_ids = data.input_ids[..., :-1]\n", " with torch.no_grad():\n", " pred = model.generate(\n", " input_ids=input_ids.cuda(),\n", " streamer=streamer if do_stream else None,\n", " use_cache=True,\n", " max_new_tokens=float('inf'),\n", " do_sample=False\n", " )\n", " decoded_text = tokenizer.batch_decode(pred, skip_special_tokens=True)\n", " decoded_text = decoded_text[0].replace(\"<[!newline]>\", \"\\n\")\n", " return (decoded_text[len(input_prompt):])" ], "id": "1919cf1f-482e-4185-9d06-e3cea1918416" }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "eaac1f6f-c823-4488-8edb-2f931ddf0daa", "outputId": "ecf6fcc2-79fb-48bd-facc-e07bf1106116" }, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:1473: UserWarning: You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use and modify the model generation configuration (see https://huggingface.co/docs/transformers/generation_strategies#default-text-generation-configuration )\n", " warnings.warn(\n" ] } ], "source": [ "eval_dic = {i:wrapper_generate(model=base_model, input_prompt=prompt_template.format(System=default_system_msg, User=evaluation_queries[i]))for i, query in enumerate(evaluation_queries)}" ], "id": "eaac1f6f-c823-4488-8edb-2f931ddf0daa" }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "fefd04ba-2ed8-4f84-bdd0-86d52b3f39f6", "outputId": "e3b123dc-7e05-407e-9313-fe5fb3d6ab3a" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "- 분석 결과 0: 음식명:치즈돈까스, 수량:한 판\n", "- 분석 결과 1: 음식명:아메리카노, 옵션:아이스, 수량:한 잔\n" ] } ], "source": [ "print(eval_dic[0])" ], "id": "fefd04ba-2ed8-4f84-bdd0-86d52b3f39f6" }, { "cell_type": "markdown", "metadata": { "id": "3f471e3a-723b-4df5-aa72-46f571f6bab6" }, "source": [ "# 미세튜닝된 모델 로딩 후 테스트" ], "id": "3f471e3a-723b-4df5-aa72-46f571f6bab6" }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "a43bdd07-7555-42b2-9888-a614afec892f" }, "outputs": [], "source": [ "bnb_config = BitsAndBytesConfig(\n", " load_in_4bit=True,\n", " bnb_4bit_quant_type=\"nf4\",\n", " bnb_4bit_compute_dtype=torch.bfloat16,\n", ") # save_model한 모델 체크포인트가져와서 로딩" ], "id": "a43bdd07-7555-42b2-9888-a614afec892f" }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 557 }, "id": "39db2ee4-23c8-471f-89b2-bca34964bf81", "outputId": "36d043d6-c590-4d3f-adb0-6a67f2d5fc95" }, "outputs": [ { "output_type": "error", "ename": "ValueError", "evalue": "ignored", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m trained_model = AutoPeftModelForCausalLM.from_pretrained(\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mscript_args\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtraining_args\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moutput_dir\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mquantization_config\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbnb_config\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mdevice_map\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"auto\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mcache_dir\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mscript_args\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcache_dir\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/peft/auto.py\u001b[0m in \u001b[0;36mfrom_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, adapter_name, is_trainable, config, **kwargs)\u001b[0m\n\u001b[1;32m 99\u001b[0m )\n\u001b[1;32m 100\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 101\u001b[0;31m \u001b[0mbase_model\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtarget_class\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_pretrained\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbase_model_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 102\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 103\u001b[0m return cls._target_peft_class.from_pretrained(\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/models/auto/auto_factory.py\u001b[0m in \u001b[0;36mfrom_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m 559\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 560\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mregister\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__class__\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodel_class\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexist_ok\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 561\u001b[0;31m return model_class.from_pretrained(\n\u001b[0m\u001b[1;32m 562\u001b[0m \u001b[0mpretrained_model_name_or_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0mmodel_args\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconfig\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mhub_kwargs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 563\u001b[0m )\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/modeling_utils.py\u001b[0m in \u001b[0;36mfrom_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m 3418\u001b[0m }\n\u001b[1;32m 3419\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m\"cpu\"\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdevice_map_without_lm_head\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0;34m\"disk\"\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdevice_map_without_lm_head\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3420\u001b[0;31m raise ValueError(\n\u001b[0m\u001b[1;32m 3421\u001b[0m \"\"\"\n\u001b[1;32m 3422\u001b[0m \u001b[0mSome\u001b[0m \u001b[0mmodules\u001b[0m \u001b[0mare\u001b[0m \u001b[0mdispatched\u001b[0m \u001b[0mon\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mCPU\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mdisk\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mMake\u001b[0m \u001b[0msure\u001b[0m \u001b[0myou\u001b[0m \u001b[0mhave\u001b[0m \u001b[0menough\u001b[0m \u001b[0mGPU\u001b[0m \u001b[0mRAM\u001b[0m \u001b[0mto\u001b[0m \u001b[0mfit\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mValueError\u001b[0m: \n Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit\n the quantized model. If you want to dispatch the model on the CPU or the disk while keeping\n these modules in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom\n `device_map` to `from_pretrained`. Check\n https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu\n for more details.\n " ] } ], "source": [ "\n", "trained_model = AutoPeftModelForCausalLM.from_pretrained(\n", " script_args.training_args.output_dir,\n", " quantization_config=bnb_config,\n", " device_map=\"auto\",\n", " cache_dir=script_args.cache_dir,\n", " trust_remote_code=True,\n", "\n", ")" ], "id": "39db2ee4-23c8-471f-89b2-bca34964bf81" }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 562 }, "id": "b0b75ca4-730d-4bde-88bb-a86462a76d52", "outputId": "f9aea033-b273-4ccc-8ebb-dfe092a90413" }, "outputs": [ { "ename": "ValueError", "evalue": "ignored", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m tokenizer = AutoTokenizer.from_pretrained(\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mscript_args\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel_name\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mtrust_remote_code\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mcache_dir\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mscript_args\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcache_dir\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m )\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/models/auto/tokenization_auto.py\u001b[0m in \u001b[0;36mfrom_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[1;32m 794\u001b[0m )\n\u001b[1;32m 795\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 796\u001b[0;31m raise ValueError(\n\u001b[0m\u001b[1;32m 797\u001b[0m \u001b[0;34mf\"Unrecognized configuration class {config.__class__} to build an AutoTokenizer.\\n\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 798\u001b[0m \u001b[0;34mf\"Model type should be one of {', '.join(c.__name__ for c in TOKENIZER_MAPPING.keys())}.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mValueError\u001b[0m: Unrecognized configuration class to build an AutoTokenizer.\nModel type should be one of AlbertConfig, AlignConfig, BarkConfig, BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BlenderbotConfig, BlenderbotSmallConfig, BlipConfig, Blip2Config, BloomConfig, BridgeTowerConfig, BrosConfig, CamembertConfig, CanineConfig, ChineseCLIPConfig, ClapConfig, CLIPConfig, CLIPSegConfig, LlamaConfig, CodeGenConfig, ConvBertConfig, CpmAntConfig, CTRLConfig, Data2VecAudioConfig, Data2VecTextConfig, DebertaConfig, DebertaV2Config, DistilBertConfig, DPRConfig, ElectraConfig, ErnieConfig, ErnieMConfig, EsmConfig, FlaubertConfig, FNetConfig, FSMTConfig, FunnelConfig, GitConfig, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GPTJConfig, GPTSanJapaneseConfig, GroupViTConfig, HubertConfig, IBertConfig, IdeficsConfig, InstructBlipConfig, JukeboxConfig, Kosmos2Config, LayoutLMConfig, LayoutLMv2Config, LayoutLMv3Config, LEDConfig, LiltConfig, LlamaConfig, LongformerConfig, LongT5Config, LukeConfig, LxmertConfig, M2M100Config, MarianConfig, MBartConfig, MegaConfig, MegatronBertConfig, MgpstrConfig, MistralConfig, MobileBertConfig, MPNetConfig, MptConfig, MraConfig, MT5Config, MusicgenConfig, MvpConfig, NezhaConfig, NllbMoeConfig, NystromformerConfig, OneFormerConfig, OpenAIGPTConfig, OPTConfig, Owlv2Config, OwlViTConfig, PegasusConfig, PegasusXConfig, PerceiverConfig, PersimmonConfig, Pix2StructConfig, PLBartConfig, ProphetNetConfig, QDQBertConfig, RagConfig, RealmCo..." ] } ], "source": [ "tokenizer = AutoTokenizer.from_pretrained(\n", " script_args.model_name,\n", " trust_remote_code=True,\n", " cache_dir=script_args.cache_dir,\n", ")\n", "\n", "if getattr(tokenizer, \"pad_token\", None) is None:\n", " tokenizer.pad_token = tokenizer.eos_token\n", "tokenizer.padding_side = \"right\" # Fix weird overflow issue with fp16 training\n", "trained_model.config.pad_token_id = tokenizer.pad_token_id" ], "id": "b0b75ca4-730d-4bde-88bb-a86462a76d52" }, { "cell_type": "markdown", "metadata": { "id": "X1tRCa4EiYXp" }, "source": [ "추론 과정에서는 GPU 메모리를 약 5.5 GB 활용" ], "id": "X1tRCa4EiYXp" }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "e374555b-9f8a-4617-8ea7-c1e6ee1b2999" }, "outputs": [], "source": [ "eval_dic = {i:wrapper_generate(model=trained_model, input_prompt=prompt_template.format(System=default_system_msg, User=evaluation_queries[i]))for i, query in enumerate(evaluation_queries)}" ], "id": "e374555b-9f8a-4617-8ea7-c1e6ee1b2999" }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "5d055bb0-5e5f-4221-a634-45d903c0f3b5" }, "outputs": [], "source": [ "print(eval_dic[0])" ], "id": "5d055bb0-5e5f-4221-a634-45d903c0f3b5" }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "Z6tBUSgQiU1i" }, "outputs": [], "source": [], "id": "Z6tBUSgQiU1i" } ], "metadata": { "accelerator": "GPU", "colab": { "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "39f60d5965554427af7d777ddfdb5c6e": { "model_module": "@jupyter-widgets/controls", "model_name": "VBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "VBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "VBoxView", "box_style": "", "children": [ "IPY_MODEL_868228753b36406daa2dee41b0dd3874", "IPY_MODEL_452c00fbce3b425f940a72707a8792cc", "IPY_MODEL_7dbdbc659e17416ab15171fe0379184f", "IPY_MODEL_72654bb34fb04051bd9dcce3abf6188d" ], "layout": "IPY_MODEL_2122301e2a8f481ebb22f718d5d38576" } }, "830abf25e3284b83b50259838de25461": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_84b344d92503448bafad168a2bcf70a3", "placeholder": "​", "style": "IPY_MODEL_4225ce6045154131b73498b1b7b55d6d", "value": "


Copy a token from your Hugging Face\ntokens page and paste it below.
Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file.
" } }, "e3be9c1a41484139826db7c7e8bad684": { "model_module": "@jupyter-widgets/controls", "model_name": "PasswordModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "PasswordModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "PasswordView", "continuous_update": true, "description": "Token:", "description_tooltip": null, "disabled": false, "layout": "IPY_MODEL_ce039ec952f14f35957b9b1f67242e87", "placeholder": "​", "style": "IPY_MODEL_de82245c43a84ac992b7d99fcc360bdc", "value": "" } }, "1b19e49a7fbb474c82f1608ec5cf01be": { "model_module": "@jupyter-widgets/controls", "model_name": "CheckboxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "CheckboxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "CheckboxView", "description": "Add token as git credential?", "description_tooltip": null, "disabled": false, "indent": true, "layout": "IPY_MODEL_1c379cd72c454368b2d7fa8f13d81c14", "style": "IPY_MODEL_f1960c78e0b9417a9d1dbcfac32be626", "value": true } }, "47e6f26a412641d4bd7aee815075d817": { "model_module": "@jupyter-widgets/controls", "model_name": "ButtonModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ButtonModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ButtonView", "button_style": "", "description": "Login", "disabled": false, "icon": "", "layout": "IPY_MODEL_14245abecc01427bb58bb10ae83df2e1", "style": "IPY_MODEL_ff827de069434301a4c7510d90d1ff5f", "tooltip": "" } }, "9c9ef3925e754ae2bd2948517c87ae8d": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_6103bae35e6843ae93c45b4ac1433f3a", "placeholder": "​", "style": "IPY_MODEL_105de6d562674a28a0817bdc4c933551", "value": "\nPro Tip: If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks.
" } }, "2122301e2a8f481ebb22f718d5d38576": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": "center", "align_self": null, "border": null, "bottom": null, "display": "flex", "flex": null, "flex_flow": "column", "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": "50%" } }, "84b344d92503448bafad168a2bcf70a3": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "4225ce6045154131b73498b1b7b55d6d": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "ce039ec952f14f35957b9b1f67242e87": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "de82245c43a84ac992b7d99fcc360bdc": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "1c379cd72c454368b2d7fa8f13d81c14": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "f1960c78e0b9417a9d1dbcfac32be626": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "14245abecc01427bb58bb10ae83df2e1": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "ff827de069434301a4c7510d90d1ff5f": { "model_module": "@jupyter-widgets/controls", "model_name": "ButtonStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ButtonStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "button_color": null, "font_weight": "" } }, "6103bae35e6843ae93c45b4ac1433f3a": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "105de6d562674a28a0817bdc4c933551": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "155bef4539784268a616235b016d5309": { "model_module": "@jupyter-widgets/controls", "model_name": "LabelModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "LabelModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "LabelView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f50840f61e1e414094a9d7951e7d3ce8", "placeholder": "​", "style": "IPY_MODEL_16821a4867544d84bcf301a95a56e83f", "value": "Connecting..." } }, "f50840f61e1e414094a9d7951e7d3ce8": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "16821a4867544d84bcf301a95a56e83f": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "868228753b36406daa2dee41b0dd3874": { "model_module": "@jupyter-widgets/controls", "model_name": "LabelModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "LabelModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "LabelView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_353d5817b86c4d61b58956bb50da83b6", "placeholder": "​", "style": "IPY_MODEL_dc992e598e364004b435d0e7dd01f7c2", "value": "Token is valid (permission: write)." } }, "452c00fbce3b425f940a72707a8792cc": { "model_module": "@jupyter-widgets/controls", "model_name": "LabelModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "LabelModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "LabelView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_b4f83ff147854a7b81fd9510981adc36", "placeholder": "​", "style": "IPY_MODEL_08bb43115500409b99b4710dab7c5237", "value": "Your token has been saved in your configured git credential helpers (store)." } }, "7dbdbc659e17416ab15171fe0379184f": { "model_module": "@jupyter-widgets/controls", "model_name": "LabelModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "LabelModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "LabelView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_c8b962a661a641a1bc6b633f5ca1fbe8", "placeholder": "​", "style": "IPY_MODEL_5a240d0c09664383975038ac835f4f86", "value": "Your token has been saved to /root/.cache/huggingface/token" } }, "72654bb34fb04051bd9dcce3abf6188d": { "model_module": "@jupyter-widgets/controls", "model_name": "LabelModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "LabelModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "LabelView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_b6c7a0c22fa2426bb9d3b9adcc465e67", "placeholder": "​", "style": "IPY_MODEL_cd2ce3e0aead44ca9ffd2c8d00f473e6", "value": "Login successful" } }, "353d5817b86c4d61b58956bb50da83b6": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "dc992e598e364004b435d0e7dd01f7c2": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "b4f83ff147854a7b81fd9510981adc36": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "08bb43115500409b99b4710dab7c5237": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "c8b962a661a641a1bc6b633f5ca1fbe8": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "5a240d0c09664383975038ac835f4f86": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "b6c7a0c22fa2426bb9d3b9adcc465e67": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "cd2ce3e0aead44ca9ffd2c8d00f473e6": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "767e90f44f384e0e8b6c4d1706493c58": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_5b10b584afb34c1492ce0de08fa79144", "IPY_MODEL_d9be240b803e43ca9bdb3475f21982cd", "IPY_MODEL_0ad2469f323a49daafecbc14d3b23c65" ], "layout": "IPY_MODEL_335fe5e5da6c448eafdb2e33fc85ca40" } }, "5b10b584afb34c1492ce0de08fa79144": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_1949ff59aa09450a902ae2de8c21f573", "placeholder": "​", "style": "IPY_MODEL_be78758cc7cd47bcb87da6ed79379fc1", "value": "Loading checkpoint shards: 100%" } }, "d9be240b803e43ca9bdb3475f21982cd": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_7dd8a76b2eb242cbb88b78cad4093040", "max": 5, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_12b1baa57d574403bc532a31535ef487", "value": 5 } }, "0ad2469f323a49daafecbc14d3b23c65": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_9896c7957d134c7baa78e15d3d3d5711", "placeholder": "​", "style": "IPY_MODEL_226720cf06b445adb4ed67ffa5888667", "value": " 5/5 [01:15<00:00, 12.59s/it]" } }, "335fe5e5da6c448eafdb2e33fc85ca40": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "1949ff59aa09450a902ae2de8c21f573": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "be78758cc7cd47bcb87da6ed79379fc1": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "7dd8a76b2eb242cbb88b78cad4093040": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "12b1baa57d574403bc532a31535ef487": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "9896c7957d134c7baa78e15d3d3d5711": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "226720cf06b445adb4ed67ffa5888667": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "nbformat": 4, "nbformat_minor": 5 }