{ "cells": [ { "cell_type": "markdown", "id": "f0eba489", "metadata": { "papermill": { "duration": 0.006601, "end_time": "2023-11-05T14:36:13.276878", "exception": false, "start_time": "2023-11-05T14:36:13.270277", "status": "completed" }, "tags": [] }, "source": [ "# Entrainement inspiré par mosaicml/mpt-7b-instruct" ] }, { "cell_type": "markdown", "id": "db789528", "metadata": { "papermill": { "duration": 0.005729, "end_time": "2023-11-05T14:36:13.288764", "exception": false, "start_time": "2023-11-05T14:36:13.283035", "status": "completed" }, "tags": [] }, "source": [ "## Installation des librairies manquantes" ] }, { "cell_type": "code", "execution_count": 1, "id": "84f6191b", "metadata": { "execution": { "iopub.execute_input": "2023-11-05T14:36:13.302534Z", "iopub.status.busy": "2023-11-05T14:36:13.302000Z", "iopub.status.idle": "2023-11-05T14:36:13.307788Z", "shell.execute_reply": "2023-11-05T14:36:13.306836Z" }, "papermill": { "duration": 0.015307, "end_time": "2023-11-05T14:36:13.310035", "exception": false, "start_time": "2023-11-05T14:36:13.294728", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "#! pip install --upgrade git+https://github.com/huggingface/transformers" ] }, { "cell_type": "code", "execution_count": 2, "id": "fc66184e", "metadata": { "execution": { "iopub.execute_input": "2023-11-05T14:36:13.323905Z", "iopub.status.busy": "2023-11-05T14:36:13.323187Z", "iopub.status.idle": "2023-11-05T14:36:37.870611Z", "shell.execute_reply": "2023-11-05T14:36:37.869380Z" }, "papermill": { "duration": 24.557026, "end_time": "2023-11-05T14:36:37.873205", "exception": false, "start_time": "2023-11-05T14:36:13.316179", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Found existing installation: transformers 4.33.0\r\n", "Uninstalling transformers-4.33.0:\r\n", " Successfully uninstalled transformers-4.33.0\r\n", "Collecting transformers\r\n", " Downloading transformers-4.35.0-py3-none-any.whl (7.9 MB)\r\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.9/7.9 MB\u001b[0m \u001b[31m53.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", "\u001b[?25hRequirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from transformers) (3.12.2)\r\n", "Requirement already satisfied: huggingface-hub<1.0,>=0.16.4 in /opt/conda/lib/python3.10/site-packages (from transformers) (0.16.4)\r\n", "Requirement already satisfied: numpy>=1.17 in /opt/conda/lib/python3.10/site-packages (from transformers) (1.23.5)\r\n", "Requirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.10/site-packages (from transformers) (21.3)\r\n", "Requirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.10/site-packages (from transformers) (6.0)\r\n", "Requirement already satisfied: regex!=2019.12.17 in /opt/conda/lib/python3.10/site-packages (from transformers) (2023.6.3)\r\n", "Requirement already satisfied: requests in /opt/conda/lib/python3.10/site-packages (from transformers) (2.31.0)\r\n", "Collecting tokenizers<0.15,>=0.14 (from transformers)\r\n", " Downloading tokenizers-0.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.8 MB)\r\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.8/3.8 MB\u001b[0m \u001b[31m80.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", "\u001b[?25hRequirement already satisfied: safetensors>=0.3.1 in /opt/conda/lib/python3.10/site-packages (from transformers) (0.3.3)\r\n", "Requirement already satisfied: tqdm>=4.27 in /opt/conda/lib/python3.10/site-packages (from transformers) (4.66.1)\r\n", "Requirement already satisfied: fsspec in /opt/conda/lib/python3.10/site-packages (from huggingface-hub<1.0,>=0.16.4->transformers) (2023.9.0)\r\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub<1.0,>=0.16.4->transformers) (4.6.3)\r\n", "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /opt/conda/lib/python3.10/site-packages (from packaging>=20.0->transformers) (3.0.9)\r\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests->transformers) (3.1.0)\r\n", "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests->transformers) (3.4)\r\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests->transformers) (1.26.15)\r\n", "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests->transformers) (2023.7.22)\r\n", "Installing collected packages: tokenizers, transformers\r\n", " Attempting uninstall: tokenizers\r\n", " Found existing installation: tokenizers 0.13.3\r\n", " Uninstalling tokenizers-0.13.3:\r\n", " Successfully uninstalled tokenizers-0.13.3\r\n", "Successfully installed tokenizers-0.14.1 transformers-4.35.0\r\n" ] } ], "source": [ "! pip uninstall transformers -y\n", "! pip install transformers" ] }, { "cell_type": "code", "execution_count": 3, "id": "7c0806c6", "metadata": { "execution": { "iopub.execute_input": "2023-11-05T14:36:37.890240Z", "iopub.status.busy": "2023-11-05T14:36:37.889545Z", "iopub.status.idle": "2023-11-05T14:36:42.780381Z", "shell.execute_reply": "2023-11-05T14:36:42.779237Z" }, "papermill": { "duration": 4.901823, "end_time": "2023-11-05T14:36:42.782503", "exception": false, "start_time": "2023-11-05T14:36:37.880680", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "4.35.0\n" ] } ], "source": [ "import transformers\n", "print(transformers.__version__)" ] }, { "cell_type": "code", "execution_count": 4, "id": "62564dae", "metadata": { "execution": { "iopub.execute_input": "2023-11-05T14:36:42.798744Z", "iopub.status.busy": "2023-11-05T14:36:42.798132Z", "iopub.status.idle": "2023-11-05T14:37:50.726556Z", "shell.execute_reply": "2023-11-05T14:37:50.725590Z" }, "papermill": { "duration": 67.939168, "end_time": "2023-11-05T14:37:50.728935", "exception": false, "start_time": "2023-11-05T14:36:42.789767", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting bitsandbytes\r\n", " Downloading bitsandbytes-0.41.1-py3-none-any.whl (92.6 MB)\r\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.6/92.6 MB\u001b[0m \u001b[31m14.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", "\u001b[?25hInstalling collected packages: bitsandbytes\r\n", "Successfully installed bitsandbytes-0.41.1\r\n", "Collecting einops\r\n", " Downloading einops-0.7.0-py3-none-any.whl (44 kB)\r\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.6/44.6 kB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", "\u001b[?25hInstalling collected packages: einops\r\n", "Successfully installed einops-0.7.0\r\n", "Collecting peft\r\n", " Downloading peft-0.6.0-py3-none-any.whl (134 kB)\r\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.9/134.9 kB\u001b[0m \u001b[31m4.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /opt/conda/lib/python3.10/site-packages (from peft) (1.23.5)\r\n", "Requirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.10/site-packages (from peft) (21.3)\r\n", "Requirement already satisfied: psutil in /opt/conda/lib/python3.10/site-packages (from peft) (5.9.3)\r\n", "Requirement already satisfied: pyyaml in /opt/conda/lib/python3.10/site-packages (from peft) (6.0)\r\n", "Requirement already satisfied: torch>=1.13.0 in /opt/conda/lib/python3.10/site-packages (from peft) (2.0.0)\r\n", "Requirement already satisfied: transformers in /opt/conda/lib/python3.10/site-packages (from peft) (4.35.0)\r\n", "Requirement already satisfied: tqdm in /opt/conda/lib/python3.10/site-packages (from peft) (4.66.1)\r\n", "Requirement already satisfied: accelerate>=0.21.0 in /opt/conda/lib/python3.10/site-packages (from peft) (0.22.0)\r\n", "Requirement already satisfied: safetensors in /opt/conda/lib/python3.10/site-packages (from peft) (0.3.3)\r\n", "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /opt/conda/lib/python3.10/site-packages (from packaging>=20.0->peft) (3.0.9)\r\n", "Requirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from torch>=1.13.0->peft) (3.12.2)\r\n", "Requirement already satisfied: typing-extensions in /opt/conda/lib/python3.10/site-packages (from torch>=1.13.0->peft) (4.6.3)\r\n", "Requirement already satisfied: sympy in /opt/conda/lib/python3.10/site-packages (from torch>=1.13.0->peft) (1.12)\r\n", "Requirement already satisfied: networkx in /opt/conda/lib/python3.10/site-packages (from torch>=1.13.0->peft) (3.1)\r\n", "Requirement already satisfied: jinja2 in /opt/conda/lib/python3.10/site-packages (from torch>=1.13.0->peft) (3.1.2)\r\n", "Requirement already satisfied: huggingface-hub<1.0,>=0.16.4 in /opt/conda/lib/python3.10/site-packages (from transformers->peft) (0.16.4)\r\n", "Requirement already satisfied: regex!=2019.12.17 in /opt/conda/lib/python3.10/site-packages (from transformers->peft) (2023.6.3)\r\n", "Requirement already satisfied: requests in /opt/conda/lib/python3.10/site-packages (from transformers->peft) (2.31.0)\r\n", "Requirement already satisfied: tokenizers<0.15,>=0.14 in /opt/conda/lib/python3.10/site-packages (from transformers->peft) (0.14.1)\r\n", "Requirement already satisfied: fsspec in /opt/conda/lib/python3.10/site-packages (from huggingface-hub<1.0,>=0.16.4->transformers->peft) (2023.9.0)\r\n", "Requirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.10/site-packages (from jinja2->torch>=1.13.0->peft) (2.1.3)\r\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests->transformers->peft) (3.1.0)\r\n", "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests->transformers->peft) (3.4)\r\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests->transformers->peft) (1.26.15)\r\n", "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests->transformers->peft) (2023.7.22)\r\n", "Requirement already satisfied: mpmath>=0.19 in /opt/conda/lib/python3.10/site-packages (from sympy->torch>=1.13.0->peft) (1.3.0)\r\n", "Installing collected packages: peft\r\n", "Successfully installed peft-0.6.0\r\n", "Collecting trl\r\n", " Downloading trl-0.7.2-py3-none-any.whl (124 kB)\r\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m124.0/124.0 kB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", "\u001b[?25hRequirement already satisfied: torch>=1.4.0 in /opt/conda/lib/python3.10/site-packages (from trl) (2.0.0)\r\n", "Requirement already satisfied: transformers>=4.18.0 in /opt/conda/lib/python3.10/site-packages (from trl) (4.35.0)\r\n", "Requirement already satisfied: numpy>=1.18.2 in /opt/conda/lib/python3.10/site-packages (from trl) (1.23.5)\r\n", "Requirement already satisfied: accelerate in /opt/conda/lib/python3.10/site-packages (from trl) (0.22.0)\r\n", "Requirement already satisfied: datasets in /opt/conda/lib/python3.10/site-packages (from trl) (2.1.0)\r\n", "Collecting tyro>=0.5.7 (from trl)\r\n", " Downloading tyro-0.5.12-py3-none-any.whl (99 kB)\r\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m99.9/99.9 kB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", "\u001b[?25hRequirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from torch>=1.4.0->trl) (3.12.2)\r\n", "Requirement already satisfied: typing-extensions in /opt/conda/lib/python3.10/site-packages (from torch>=1.4.0->trl) (4.6.3)\r\n", "Requirement already satisfied: sympy in /opt/conda/lib/python3.10/site-packages (from torch>=1.4.0->trl) (1.12)\r\n", "Requirement already satisfied: networkx in /opt/conda/lib/python3.10/site-packages (from torch>=1.4.0->trl) (3.1)\r\n", "Requirement already satisfied: jinja2 in /opt/conda/lib/python3.10/site-packages (from torch>=1.4.0->trl) (3.1.2)\r\n", "Requirement already satisfied: huggingface-hub<1.0,>=0.16.4 in /opt/conda/lib/python3.10/site-packages (from transformers>=4.18.0->trl) (0.16.4)\r\n", "Requirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.10/site-packages (from transformers>=4.18.0->trl) (21.3)\r\n", "Requirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.10/site-packages (from transformers>=4.18.0->trl) (6.0)\r\n", "Requirement already satisfied: regex!=2019.12.17 in /opt/conda/lib/python3.10/site-packages (from transformers>=4.18.0->trl) (2023.6.3)\r\n", "Requirement already satisfied: requests in /opt/conda/lib/python3.10/site-packages (from transformers>=4.18.0->trl) (2.31.0)\r\n", "Requirement already satisfied: tokenizers<0.15,>=0.14 in /opt/conda/lib/python3.10/site-packages (from transformers>=4.18.0->trl) (0.14.1)\r\n", "Requirement already satisfied: safetensors>=0.3.1 in /opt/conda/lib/python3.10/site-packages (from transformers>=4.18.0->trl) (0.3.3)\r\n", "Requirement already satisfied: tqdm>=4.27 in /opt/conda/lib/python3.10/site-packages (from transformers>=4.18.0->trl) (4.66.1)\r\n", "Requirement already satisfied: docstring-parser>=0.14.1 in /opt/conda/lib/python3.10/site-packages (from tyro>=0.5.7->trl) (0.15)\r\n", "Requirement already satisfied: rich>=11.1.0 in /opt/conda/lib/python3.10/site-packages (from tyro>=0.5.7->trl) (13.4.2)\r\n", "Collecting shtab>=1.5.6 (from tyro>=0.5.7->trl)\r\n", " Downloading shtab-1.6.4-py3-none-any.whl (13 kB)\r\n", "Requirement already satisfied: psutil in /opt/conda/lib/python3.10/site-packages (from accelerate->trl) (5.9.3)\r\n", "Requirement already satisfied: pyarrow>=5.0.0 in /opt/conda/lib/python3.10/site-packages (from datasets->trl) (11.0.0)\r\n", "Requirement already satisfied: dill in /opt/conda/lib/python3.10/site-packages (from datasets->trl) (0.3.7)\r\n", "Requirement already satisfied: pandas in /opt/conda/lib/python3.10/site-packages (from datasets->trl) (2.0.2)\r\n", "Requirement already satisfied: xxhash in /opt/conda/lib/python3.10/site-packages (from datasets->trl) (3.3.0)\r\n", "Requirement already satisfied: multiprocess in /opt/conda/lib/python3.10/site-packages (from datasets->trl) (0.70.15)\r\n", "Requirement already satisfied: fsspec[http]>=2021.05.0 in /opt/conda/lib/python3.10/site-packages (from datasets->trl) (2023.9.0)\r\n", "Requirement already satisfied: aiohttp in /opt/conda/lib/python3.10/site-packages (from datasets->trl) (3.8.4)\r\n", "Requirement already satisfied: responses<0.19 in /opt/conda/lib/python3.10/site-packages (from datasets->trl) (0.18.0)\r\n", "Requirement already satisfied: attrs>=17.3.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets->trl) (23.1.0)\r\n", "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets->trl) (3.1.0)\r\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets->trl) (6.0.4)\r\n", "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets->trl) (4.0.2)\r\n", "Requirement already satisfied: yarl<2.0,>=1.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets->trl) (1.9.2)\r\n", "Requirement already satisfied: frozenlist>=1.1.1 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets->trl) (1.3.3)\r\n", "Requirement already satisfied: aiosignal>=1.1.2 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets->trl) (1.3.1)\r\n", "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /opt/conda/lib/python3.10/site-packages (from packaging>=20.0->transformers>=4.18.0->trl) (3.0.9)\r\n", "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests->transformers>=4.18.0->trl) (3.4)\r\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests->transformers>=4.18.0->trl) (1.26.15)\r\n", "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests->transformers>=4.18.0->trl) (2023.7.22)\r\n", "Requirement already satisfied: markdown-it-py>=2.2.0 in /opt/conda/lib/python3.10/site-packages (from rich>=11.1.0->tyro>=0.5.7->trl) (2.2.0)\r\n", "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /opt/conda/lib/python3.10/site-packages (from rich>=11.1.0->tyro>=0.5.7->trl) (2.15.1)\r\n", "Requirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.10/site-packages (from jinja2->torch>=1.4.0->trl) (2.1.3)\r\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets->trl) (2.8.2)\r\n", "Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets->trl) (2023.3)\r\n", "Requirement already satisfied: tzdata>=2022.1 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets->trl) (2023.3)\r\n", "Requirement already satisfied: mpmath>=0.19 in /opt/conda/lib/python3.10/site-packages (from sympy->torch>=1.4.0->trl) (1.3.0)\r\n", "Requirement already satisfied: mdurl~=0.1 in /opt/conda/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich>=11.1.0->tyro>=0.5.7->trl) (0.1.0)\r\n", "Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->datasets->trl) (1.16.0)\r\n", "Installing collected packages: shtab, tyro, trl\r\n", "Successfully installed shtab-1.6.4 trl-0.7.2 tyro-0.5.12\r\n", "Found existing installation: datasets 2.1.0\r\n", "Uninstalling datasets-2.1.0:\r\n", " Successfully uninstalled datasets-2.1.0\r\n", "Collecting datasets==2.13.1\r\n", " Downloading datasets-2.13.1-py3-none-any.whl (486 kB)\r\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m486.2/486.2 kB\u001b[0m \u001b[31m8.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /opt/conda/lib/python3.10/site-packages (from datasets==2.13.1) (1.23.5)\r\n", "Requirement already satisfied: pyarrow>=8.0.0 in /opt/conda/lib/python3.10/site-packages (from datasets==2.13.1) (11.0.0)\r\n", "Collecting dill<0.3.7,>=0.3.0 (from datasets==2.13.1)\r\n", " Downloading dill-0.3.6-py3-none-any.whl (110 kB)\r\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m110.5/110.5 kB\u001b[0m \u001b[31m12.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", "\u001b[?25hRequirement already satisfied: pandas in /opt/conda/lib/python3.10/site-packages (from datasets==2.13.1) (2.0.2)\r\n", "Requirement already satisfied: requests>=2.19.0 in /opt/conda/lib/python3.10/site-packages (from datasets==2.13.1) (2.31.0)\r\n", "Requirement already satisfied: tqdm>=4.62.1 in /opt/conda/lib/python3.10/site-packages (from datasets==2.13.1) (4.66.1)\r\n", "Requirement already satisfied: xxhash in /opt/conda/lib/python3.10/site-packages (from datasets==2.13.1) (3.3.0)\r\n", "Requirement already satisfied: multiprocess in /opt/conda/lib/python3.10/site-packages (from datasets==2.13.1) (0.70.15)\r\n", "Requirement already satisfied: fsspec[http]>=2021.11.1 in /opt/conda/lib/python3.10/site-packages (from datasets==2.13.1) (2023.9.0)\r\n", "Requirement already satisfied: aiohttp in /opt/conda/lib/python3.10/site-packages (from datasets==2.13.1) (3.8.4)\r\n", "Requirement already satisfied: huggingface-hub<1.0.0,>=0.11.0 in /opt/conda/lib/python3.10/site-packages (from datasets==2.13.1) (0.16.4)\r\n", "Requirement already satisfied: packaging in /opt/conda/lib/python3.10/site-packages (from datasets==2.13.1) (21.3)\r\n", "Requirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.10/site-packages (from datasets==2.13.1) (6.0)\r\n", "Requirement already satisfied: attrs>=17.3.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets==2.13.1) (23.1.0)\r\n", "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets==2.13.1) (3.1.0)\r\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets==2.13.1) (6.0.4)\r\n", "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets==2.13.1) (4.0.2)\r\n", "Requirement already satisfied: yarl<2.0,>=1.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets==2.13.1) (1.9.2)\r\n", "Requirement already satisfied: frozenlist>=1.1.1 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets==2.13.1) (1.3.3)\r\n", "Requirement already satisfied: aiosignal>=1.1.2 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets==2.13.1) (1.3.1)\r\n", "Requirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from huggingface-hub<1.0.0,>=0.11.0->datasets==2.13.1) (3.12.2)\r\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub<1.0.0,>=0.11.0->datasets==2.13.1) (4.6.3)\r\n", "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /opt/conda/lib/python3.10/site-packages (from packaging->datasets==2.13.1) (3.0.9)\r\n", "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests>=2.19.0->datasets==2.13.1) (3.4)\r\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests>=2.19.0->datasets==2.13.1) (1.26.15)\r\n", "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests>=2.19.0->datasets==2.13.1) (2023.7.22)\r\n", "INFO: pip is looking at multiple versions of multiprocess to determine which version is compatible with other requirements. This could take a while.\r\n", "Collecting multiprocess (from datasets==2.13.1)\r\n", " Downloading multiprocess-0.70.14-py310-none-any.whl (134 kB)\r\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.3/134.3 kB\u001b[0m \u001b[31m14.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", "\u001b[?25hRequirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets==2.13.1) (2.8.2)\r\n", "Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets==2.13.1) (2023.3)\r\n", "Requirement already satisfied: tzdata>=2022.1 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets==2.13.1) (2023.3)\r\n", "Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->datasets==2.13.1) (1.16.0)\r\n", "Installing collected packages: dill, multiprocess, datasets\r\n", " Attempting uninstall: dill\r\n", " Found existing installation: dill 0.3.7\r\n", " Uninstalling dill-0.3.7:\r\n", " Successfully uninstalled dill-0.3.7\r\n", " Attempting uninstall: multiprocess\r\n", " Found existing installation: multiprocess 0.70.15\r\n", " Uninstalling multiprocess-0.70.15:\r\n", " Successfully uninstalled multiprocess-0.70.15\r\n", "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\r\n", "apache-beam 2.46.0 requires dill<0.3.2,>=0.3.1.1, but you have dill 0.3.6 which is incompatible.\r\n", "apache-beam 2.46.0 requires pyarrow<10.0.0,>=3.0.0, but you have pyarrow 11.0.0 which is incompatible.\r\n", "pathos 0.3.1 requires dill>=0.3.7, but you have dill 0.3.6 which is incompatible.\r\n", "pathos 0.3.1 requires multiprocess>=0.70.15, but you have multiprocess 0.70.14 which is incompatible.\r\n", "pymc3 3.11.5 requires numpy<1.22.2,>=1.15.0, but you have numpy 1.23.5 which is incompatible.\r\n", "pymc3 3.11.5 requires scipy<1.8.0,>=1.7.3, but you have scipy 1.11.2 which is incompatible.\u001b[0m\u001b[31m\r\n", "\u001b[0mSuccessfully installed datasets-2.13.1 dill-0.3.6 multiprocess-0.70.14\r\n" ] }, { "data": { "text/plain": [ "'2.13.1'" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "! pip install bitsandbytes\n", "! pip install einops\n", "! pip install peft\n", "! pip install trl\n", "\n", "# Bug selon la version de datasets, besoin d'installer une version plus récente que celle de l'environnement pré-installé :\n", "! pip uninstall datasets -y\n", "! pip install datasets==2.13.1\n", "\n", "import datasets\n", "datasets.__version__" ] }, { "cell_type": "markdown", "id": "2940cba6", "metadata": { "papermill": { "duration": 0.015349, "end_time": "2023-11-05T14:37:50.760273", "exception": false, "start_time": "2023-11-05T14:37:50.744924", "status": "completed" }, "tags": [] }, "source": [ "## Import des librairies" ] }, { "cell_type": "code", "execution_count": 5, "id": "545be344", "metadata": { "execution": { "iopub.execute_input": "2023-11-05T14:37:50.792822Z", "iopub.status.busy": "2023-11-05T14:37:50.792325Z", "iopub.status.idle": "2023-11-05T14:38:00.834522Z", "shell.execute_reply": "2023-11-05T14:38:00.833660Z" }, "papermill": { "duration": 10.060969, "end_time": "2023-11-05T14:38:00.836968", "exception": false, "start_time": "2023-11-05T14:37:50.775999", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/opt/conda/lib/python3.10/site-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.23.5\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n" ] } ], "source": [ "import einops\n", "import torch\n", "from datasets import load_dataset\n", "from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments\n", "from peft import LoraConfig\n", "from trl import SFTTrainer" ] }, { "cell_type": "markdown", "id": "9a558c2c", "metadata": { "papermill": { "duration": 0.014581, "end_time": "2023-11-05T14:38:00.866800", "exception": false, "start_time": "2023-11-05T14:38:00.852219", "status": "completed" }, "tags": [] }, "source": [ "## Téléchargement du dataset pour le fine tuning" ] }, { "cell_type": "code", "execution_count": 6, "id": "1257408b", "metadata": { "execution": { "iopub.execute_input": "2023-11-05T14:38:00.897674Z", "iopub.status.busy": "2023-11-05T14:38:00.897347Z", "iopub.status.idle": "2023-11-05T14:38:02.495699Z", "shell.execute_reply": "2023-11-05T14:38:02.494614Z" }, "id": "0X3kHnskSWU4", "outputId": "1c116f6e-546a-4f75-bc6f-2eec2a505858", "papermill": { "duration": 1.616143, "end_time": "2023-11-05T14:38:02.497747", "exception": false, "start_time": "2023-11-05T14:38:00.881604", "status": "completed" }, "tags": [] }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "77c2ae5677444324b6cc4f93316a9392", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading readme: 0%| | 0.00/1.03k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Downloading and preparing dataset csv/Laurent1--MedQuad-MedicalQnADataset_128tokens_max to /root/.cache/huggingface/datasets/Laurent1___csv/Laurent1--MedQuad-MedicalQnADataset_128tokens_max-2fc5fd9a3c22611f/0.0.0/eea64c71ca8b46dd3f537ed218fc9bf495d5707789152eb2764f5c78fa66d59d...\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "5ee6b0f0d14f48afa608d13c3f145f8e", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading data files: 0%| | 0/1 [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "d5ee1e7c08374549938e377215643964", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading data: 0%| | 0.00/7.31M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "15bcb780d0d84277a2dd9939ca5da8f2", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Extracting data files: 0%| | 0/1 [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "779d16aa846d444c846e3a284f1fa851", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Generating train split: 0 examples [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Dataset csv downloaded and prepared to /root/.cache/huggingface/datasets/Laurent1___csv/Laurent1--MedQuad-MedicalQnADataset_128tokens_max-2fc5fd9a3c22611f/0.0.0/eea64c71ca8b46dd3f537ed218fc9bf495d5707789152eb2764f5c78fa66d59d. Subsequent calls will reuse this data.\n" ] }, { "data": { "text/plain": [ "Dataset({\n", " features: ['text'],\n", " num_rows: 5120\n", "})" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "dataset_name = \"Laurent1/MedQuad-MedicalQnADataset_128tokens_max\" \n", "# On fine tune les 5000 premieres questions sinon c'est un peu long...\n", "dataset = load_dataset(dataset_name, split='train[:5120]')\n", "dataset" ] }, { "cell_type": "markdown", "id": "ecb2b90e", "metadata": { "id": "rjOMoSbGSxx9", "papermill": { "duration": 0.015642, "end_time": "2023-11-05T14:38:02.529877", "exception": false, "start_time": "2023-11-05T14:38:02.514235", "status": "completed" }, "tags": [] }, "source": [ "## Téléchargement du model pre-entrainé et de son tokenizer" ] }, { "cell_type": "code", "execution_count": 7, "id": "4cfaaf79", "metadata": { "execution": { "iopub.execute_input": "2023-11-05T14:38:02.565154Z", "iopub.status.busy": "2023-11-05T14:38:02.564534Z", "iopub.status.idle": "2023-11-05T14:40:20.736660Z", "shell.execute_reply": "2023-11-05T14:40:20.735856Z" }, "id": "ZwXZbQ2dSwzI", "outputId": "a57e521a-a8a3-48e9-a478-63334083f94a", "papermill": { "duration": 138.192221, "end_time": "2023-11-05T14:40:20.738822", "exception": false, "start_time": "2023-11-05T14:38:02.546601", "status": "completed" }, "tags": [] }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c4c2f6816372462588b0e9193672a8a0", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading (…)lve/main/config.json: 0%| | 0.00/571 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "7117e6f375df4914b3fc50d2c6dbb836", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading (…)model.bin.index.json: 0%| | 0.00/23.9k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "1a37267929e448f6ac64d87784d92856", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading shards: 0%| | 0/2 [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "227e39e14d444e8f8627ce2d7fa288f8", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading (…)l-00001-of-00002.bin: 0%| | 0.00/9.94G [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "5e7d3e28759f4a9b9735433bf5b571f7", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading (…)l-00002-of-00002.bin: 0%| | 0.00/5.06G [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "1ed57c903df4431c842f5db457942c50", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Loading checkpoint shards: 0%| | 0/2 [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "4f713cbf15ec45358818f491d2b16600", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading (…)neration_config.json: 0%| | 0.00/116 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "ad5807b551ec46509deb6b9aaeb0624b", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading (…)okenizer_config.json: 0%| | 0.00/1.47k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a85ecb64a315436f91d21fd33a5e822b", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading tokenizer.model: 0%| | 0.00/493k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "bc23071c505d49969604cb3870c9664f", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading (…)/main/tokenizer.json: 0%| | 0.00/1.80M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f23c16709f084247810ad9a8e8973e7b", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading (…)cial_tokens_map.json: 0%| | 0.00/72.0 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# BitsAndBytes permet le fine tuning avec \"quantification\" pour réduire l'impact mémoire et les calculs\n", "bnb_config = BitsAndBytesConfig(\n", " load_in_4bit=True,\n", " bnb_4bit_quant_type=\"nf4\",\n", " bnb_4bit_compute_dtype=torch.float16,\n", ")\n", "\n", "model = AutoModelForCausalLM.from_pretrained(\n", " \"mistralai/Mistral-7B-Instruct-v0.1\",\n", " device_map=\"auto\",\n", " torch_dtype=torch.float16, #torch.bfloat16,\n", " trust_remote_code=True\n", " )\n", "\n", "tokenizer = AutoTokenizer.from_pretrained(\"mistralai/Mistral-7B-Instruct-v0.1\")\n", "tokenizer.pad_token = tokenizer.eos_token\n", "tokenizer.padding_side = \"right\" " ] }, { "cell_type": "code", "execution_count": 8, "id": "6674e6c8", "metadata": { "execution": { "iopub.execute_input": "2023-11-05T14:40:20.775027Z", "iopub.status.busy": "2023-11-05T14:40:20.774728Z", "iopub.status.idle": "2023-11-05T14:40:20.888681Z", "shell.execute_reply": "2023-11-05T14:40:20.887708Z" }, "papermill": { "duration": 0.135095, "end_time": "2023-11-05T14:40:20.890945", "exception": false, "start_time": "2023-11-05T14:40:20.755850", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "tokenizer = AutoTokenizer.from_pretrained(\"mistralai/Mistral-7B-Instruct-v0.1\")\n", "tokenizer.pad_token = tokenizer.eos_token\n", "tokenizer.padding_side = \"right\" " ] }, { "cell_type": "markdown", "id": "7c47b340", "metadata": { "id": "NuAx3zBeUL1q", "papermill": { "duration": 0.01656, "end_time": "2023-11-05T14:40:20.924463", "exception": false, "start_time": "2023-11-05T14:40:20.907903", "status": "completed" }, "tags": [] }, "source": [ "## Configuration du peft LoRa" ] }, { "cell_type": "code", "execution_count": 9, "id": "17410f8e", "metadata": { "execution": { "iopub.execute_input": "2023-11-05T14:40:20.959753Z", "iopub.status.busy": "2023-11-05T14:40:20.959437Z", "iopub.status.idle": "2023-11-05T14:40:20.964613Z", "shell.execute_reply": "2023-11-05T14:40:20.963830Z" }, "id": "dQdvjTYTT1vQ", "papermill": { "duration": 0.025208, "end_time": "2023-11-05T14:40:20.966437", "exception": false, "start_time": "2023-11-05T14:40:20.941229", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "lora_alpha = 16\n", "lora_dropout = 0.1\n", "lora_r = 32\n", "\n", "peft_config = LoraConfig(\n", " lora_alpha=lora_alpha,\n", " lora_dropout=lora_dropout,\n", " r=lora_r,\n", " bias=\"none\",\n", " task_type=\"CAUSAL_LM\",\n", " target_modules=[\n", " \"Wqkv\",\n", " \"out_proj\",\n", " \"up_proj\",\n", " \"down_proj\",\n", " ]\n", ")" ] }, { "cell_type": "markdown", "id": "6160fc4a", "metadata": { "id": "dzsYHLwIZoLm", "papermill": { "duration": 0.016841, "end_time": "2023-11-05T14:40:21.045365", "exception": false, "start_time": "2023-11-05T14:40:21.028524", "status": "completed" }, "tags": [] }, "source": [ "## Préparation de l'entraineur (Supervised Fine-tuning Trainer)" ] }, { "cell_type": "markdown", "id": "ae0c2932", "metadata": { "id": "aTBJVE4PaJwK", "papermill": { "duration": 0.017068, "end_time": "2023-11-05T14:40:21.079530", "exception": false, "start_time": "2023-11-05T14:40:21.062462", "status": "completed" }, "tags": [] }, "source": [ "Utilisation de [`SFTTrainer` de la librairie TRL](https://huggingface.co/docs/trl/main/en/sft_trainer) qui est un wrapper de Trainer facilite le fine tuning avec LoRa" ] }, { "cell_type": "code", "execution_count": 10, "id": "2a67666c", "metadata": { "execution": { "iopub.execute_input": "2023-11-05T14:40:21.115958Z", "iopub.status.busy": "2023-11-05T14:40:21.115607Z", "iopub.status.idle": "2023-11-05T14:40:21.124632Z", "shell.execute_reply": "2023-11-05T14:40:21.123856Z" }, "papermill": { "duration": 0.029394, "end_time": "2023-11-05T14:40:21.126543", "exception": false, "start_time": "2023-11-05T14:40:21.097149", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "output_dir = \"/kaggle/working/\"\n", "per_device_train_batch_size = 1\n", "gradient_accumulation_steps = 16\n", "optim = \"paged_adamw_32bit\"\n", "save_steps = 64\n", "logging_steps = 64\n", "learning_rate = 1e-4\n", "max_grad_norm = 0.3\n", "max_steps = 1600\n", "warmup_ratio = 0.03\n", "lr_scheduler_type = \"linear\"\n", "\n", "training_arguments = TrainingArguments(\n", " output_dir=output_dir,\n", " per_device_train_batch_size=per_device_train_batch_size,\n", " gradient_accumulation_steps=gradient_accumulation_steps,\n", " optim=optim,\n", " #save_steps=save_steps,\n", " logging_steps=logging_steps,\n", " \n", " #num_train_epochs=5,# *************\n", " #logging_strategy='epoch',# *************\n", " \n", " save_strategy= 'no', #''epoch',# *************\n", " \n", " learning_rate=learning_rate,\n", " fp16=True,\n", " max_grad_norm=max_grad_norm,\n", " max_steps=max_steps,\n", " warmup_ratio=warmup_ratio,\n", " group_by_length=True,\n", " lr_scheduler_type=lr_scheduler_type,\n", " report_to = 'none',\n", " save_total_limit = 1\n", ")" ] }, { "cell_type": "code", "execution_count": 11, "id": "4f0461bf", "metadata": { "execution": { "iopub.execute_input": "2023-11-05T14:40:21.162082Z", "iopub.status.busy": "2023-11-05T14:40:21.161812Z", "iopub.status.idle": "2023-11-05T14:40:22.706874Z", "shell.execute_reply": "2023-11-05T14:40:22.706055Z" }, "id": "TNeOBgZeTl2H", "outputId": "b99059a4-4b33-4ff5-97cb-70e5840578ce", "papermill": { "duration": 1.564685, "end_time": "2023-11-05T14:40:22.708852", "exception": false, "start_time": "2023-11-05T14:40:21.144167", "status": "completed" }, "tags": [] }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "47655cef061b40e0b0306253904008e7", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Map: 0%| | 0/5120 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "trainer = SFTTrainer(\n", " model=model,\n", " train_dataset=dataset,\n", " peft_config=peft_config,\n", " dataset_text_field=\"text\",\n", " max_seq_length= 512,\n", " tokenizer=tokenizer,\n", " args=training_arguments,\n", ")" ] }, { "cell_type": "markdown", "id": "8b2603be", "metadata": { "id": "1JApkSrCcL3O", "papermill": { "duration": 0.016992, "end_time": "2023-11-05T14:40:22.743691", "exception": false, "start_time": "2023-11-05T14:40:22.726699", "status": "completed" }, "tags": [] }, "source": [ "## Entrainement du model" ] }, { "cell_type": "code", "execution_count": 12, "id": "7d8d7c8c", "metadata": { "execution": { "iopub.execute_input": "2023-11-05T14:40:22.778892Z", "iopub.status.busy": "2023-11-05T14:40:22.778614Z", "iopub.status.idle": "2023-11-05T16:31:38.532123Z", "shell.execute_reply": "2023-11-05T16:31:38.531197Z" }, "id": "_kbS7nRxcMt7", "outputId": "4db61972-d1d9-4c43-d25a-a122da136bb7", "papermill": { "duration": 6675.773544, "end_time": "2023-11-05T16:31:38.534087", "exception": false, "start_time": "2023-11-05T14:40:22.760543", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n" ] }, { "data": { "text/html": [ "\n", "
Step | \n", "Training Loss | \n", "
---|---|
64 | \n", "1.548300 | \n", "
128 | \n", "0.926900 | \n", "
192 | \n", "0.878700 | \n", "
256 | \n", "0.872800 | \n", "
320 | \n", "0.824000 | \n", "
384 | \n", "0.729100 | \n", "
448 | \n", "0.719300 | \n", "
512 | \n", "0.686900 | \n", "
576 | \n", "0.708900 | \n", "
640 | \n", "0.691500 | \n", "
704 | \n", "0.571700 | \n", "
768 | \n", "0.546300 | \n", "
832 | \n", "0.530700 | \n", "
896 | \n", "0.544600 | \n", "
960 | \n", "0.550300 | \n", "
1024 | \n", "0.430000 | \n", "
1088 | \n", "0.406100 | \n", "
1152 | \n", "0.414800 | \n", "
1216 | \n", "0.427100 | \n", "
1280 | \n", "0.405000 | \n", "
1344 | \n", "0.329300 | \n", "
1408 | \n", "0.321700 | \n", "
1472 | \n", "0.321200 | \n", "
1536 | \n", "0.325500 | \n", "
1600 | \n", "0.314200 | \n", "
"
],
"text/plain": [
"