{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "Q-bj6K7Qv4ft" }, "source": [ "# Fine-Tuning a Generative Pretrained Transformer (`GPT`)\n", "\n", "1. Install required libraries." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "SBWCrz5GfBXo", "outputId": "38e762f5-80a7-4b21-a23a-a5710773e4d5" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.6/7.6 MB\u001b[0m \u001b[31m30.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m519.6/519.6 kB\u001b[0m \u001b[31m39.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m179.8/179.8 kB\u001b[0m \u001b[31m20.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m294.8/294.8 kB\u001b[0m \u001b[31m32.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m87.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m72.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m14.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m23.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m16.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.4/66.4 kB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.1/53.1 kB\u001b[0m \u001b[31m6.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hToken will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.\n", "Token is valid (permission: write).\n", "Your token has been saved to /root/.cache/huggingface/token\n", "Login successful\n" ] } ], "source": [ "!pip install transformers datasets codecarbon -q" ] }, { "cell_type": "markdown", "metadata": { "id": "y5XnfvSH7w4z" }, "source": [ "2. Load the data from the hub." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 675, "referenced_widgets": [ "f2acaf555cf044a3ac809f1297479664", "dece28a735b44af0816e8701a3d8c49c", "60558dc25d7b41d3a71d07675774784c", "d8605fe6bae54f2bb8a26ceb94f59542", "bab581ac061a41d1b3f731143b4655e0", "13956b9cb4d14ebab2d864b1682c57de", "e82965028cdc405db5d55594719a2fd9", "949a193e78a149b787ae2a51142e4b39", "45ef37100f914376a61437fcb91d62a0", "2077f315ca6d424b9ff42f6170a360d4", "c2a7eb76e7b848aba3d07b5ccdcbe6cf", "fadfb8b34f954561ad7294c1992e44fd", "22f45fb9a91947f3a172992600e590fa", "e2e45deaae0d4152b70749be8f61d797", "738d4429d69c4424aaaa2ebad38a97e9", "ab15e5766e7a4f43ba193446b3a69fce", "680f20adb3f84a02bc4ee1eed779c096", "0c1be640620b4ea5a72bcf4350a2e4ab", "a8cc952f9cbd46f28fb77f8493820cfb", "086e25d9ba8241539c24fb4066d41758", "e4a2797046ff4017ad6a96651b38db86", "aa4646c7102f4fb8a91ce826c9485b91", "618285d07d3f45649bcd7e3fea88b0b5", "2cb6b7c947f947aeb8123a2b5c8f5cc9", "7fc342476b1042759fbc8ef72922d1ce", "7026a0879aa348a6b0d5391c7bfa0fa6", "d2ac4806eb1f4f13aedf130f17a702bc", "0821833b0a314e07a59553255c6c817c", "4f9d5acb1b0145af8c6c277096560bc2", "21539feabb9e49aa8b7b86d784320263", "b55dd06417574aa493b4bde9a1c164f9", "f726b7b12a304caf83eaa09792e4e95c", "a7515ebcf1b140a7883b5dcc0919109b", "991e694dc5a746658a4fab02f7827aab", "5a06d807a45c470d884965d15a8a8399", "60a3166201a2456fb6f65d9220c2b0f2", "a521856ee62c4d5080bb2c97c5aa6155", "b15ddfc0d6eb43b29c41322dd23a46e3", "928a230e18c34f92837c7bb181f25a98", "c0039d680f4e4016b39479d70c9235fd", "52dceb5d44ff4031b3f9ebe76dc28933", "96afc99efbac47e78cb49ce06a6dc17d", "a4b02ed3479745128ad8122f429b708f", "bc305df8ae344756be9da2feabddfc1c", "3713730207834b97a526de6a910c99b4", "612dace2381946b0ad954f47d0b89b16", "095e7e25521c4a8e8c65f388e6638c9b", "c5d1d57007d241329ba55e4afcf13e38", "ece1ce12ee464bcba3ab89682be88dfa", "1c8e2d47e651450abc1b879c2852f652", "25056c36047a44598cc3d450ead19f2d", "0223fbdab2fa4a52b4f4a9e7c7ffdc36", "8d3813a2828c418eb2751ee5890315a6", "b452315d4734449a99532165cbbdaf0a", "0e1d871476cd4c19a88d3f7445f82cf7", "3ced0188ffa44ae0a62c0c8dd170bad0", "c72a8c5e12b048bda0d62b215431e5f8", "a9ca5f15352944409c14dc7c25c8635e", "9d01a0ef650746e7998d8832622be6fd", "0f2107acb1c143b1b759e19528d40d1b", "ff2c72a487c04ac58ab83da8e3cb80fb", "b1f50fce910946fbb7cb77f3f5354375", "738edeb8a6f84dc9a505852783b75977", "8aa8f6aff55f41afb16cdde5174dcbd1", "e506c138fe0e4d779536676e3af1c61b", "f34ceeb905d74d10aa808abf84105528", "3765a4779fff40c2a9f9dc0c58432087", "8110b1681c644a9f80e071b2cc25182a", "19751b59436e4d36af4caef942804cfe", "32486cb1a4a04b879564cc45542c886f", "d9261289724e43ab8326494ab84e20d9", "72aa9dcc457043a193dc43db05321008", "e3ee78b3aca44d9f87e7057fd9252786", "7d1e4fce0f794d998ddc087ab971f7d9", "a6d7adf49b814b82854e375f187c5121", "c534d176d4e44a2383acf9741107b323", "aedc9368b2484f73aaac56e296cde7b6" ] }, "id": "7MbpXGu-v4f1", "outputId": "619ca839-f13c-4b9c-a6be-0f2dc384440a" }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f2acaf555cf044a3ac809f1297479664", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading readme: 0%| | 0.00/2.81k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "fadfb8b34f954561ad7294c1992e44fd", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading data files: 0%| | 0/2 [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "618285d07d3f45649bcd7e3fea88b0b5", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading data: 0%| | 0.00/26.7M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "991e694dc5a746658a4fab02f7827aab", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading data: 0%| | 0.00/29.5M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "3713730207834b97a526de6a910c99b4", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Extracting data files: 0%| | 0/2 [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "3ced0188ffa44ae0a62c0c8dd170bad0", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Generating english split: 0%| | 0/41815 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "3765a4779fff40c2a9f9dc0c58432087", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Generating portuguese split: 0%| | 0/41815 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", " | prompt | \n", "completion | \n", "
---|---|---|
0 | \n", "Why can camels survive for long without water? | \n", "Camels can survive for long periods without wa... | \n", "
1 | \n", "Why mobile is bad for human | \n", "Mobile phones as technological devices have be... | \n", "
2 | \n", "What is a polygon? | \n", "A polygon is a two-dimensional geometric figur... | \n", "
3 | \n", "How do I start running? | \n", "Starting a running routine can be a great way ... | \n", "
4 | \n", "Which episodes of season four of Game of Thron... | \n", "Michelle MacLaren directed two episodes in Sea... | \n", "
... | \n", "... | \n", "... | \n", "
41810 | \n", "What can I call you by? | \n", "You can call me Aira. How may I assist you today? | \n", "
41811 | \n", "What's your identification name? | \n", "As an artificial intelligence, I do not have a... | \n", "
41812 | \n", "What are you called? | \n", "Hello! I am called Aira. How can I assist you ... | \n", "
41813 | \n", "How do you introduce yourself? | \n", "Hello! I am Aira, your helpful, respectful, an... | \n", "
41814 | \n", "What should I say when I address you? | \n", "When addressing me, you can simply call me Air... | \n", "
41815 rows × 2 columns
\n", "