{"cells":[{"cell_type":"markdown","metadata":{"id":"Q-bj6K7Qv4ft"},"source":["# Fine-Tuning a Generative Pretrained Transformer (`GPT`)\n","\n","1. Install required libraries."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":16898,"status":"ok","timestamp":1696992512229,"user":{"displayName":"Nicholas Corrêa","userId":"09736120585766268588"},"user_tz":-120},"id":"SBWCrz5GfBXo","outputId":"8070f6ae-1aa6-44fb-e8ba-74990eb92664"},"outputs":[{"name":"stdout","output_type":"stream","text":["\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.7/7.7 MB\u001b[0m \u001b[31m51.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m519.6/519.6 kB\u001b[0m \u001b[31m41.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m70.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m179.8/179.8 kB\u001b[0m \u001b[31m20.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m302.0/302.0 kB\u001b[0m \u001b[31m29.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.8/3.8 MB\u001b[0m \u001b[31m91.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m74.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m13.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m23.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m16.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.4/66.4 kB\u001b[0m \u001b[31m7.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.1/53.1 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m295.0/295.0 kB\u001b[0m \u001b[31m29.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hToken will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.\n","Token is valid (permission: write).\n","Your token has been saved to /root/.cache/huggingface/token\n","Login successful\n"]}],"source":["!pip install transformers datasets sentencepiece codecarbon -q"]},{"cell_type":"markdown","metadata":{"id":"y5XnfvSH7w4z"},"source":["2. Load the data from the hub."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":647,"referenced_widgets":["e0566242e89b4a6ab49507dcbf26e76b","6c5f792b7b0c4bf7beee3bafb6fb129f","26c95d082a414261a40330eed75ac0f3","2cbd500c13a14b34b618c96242981fb7","647b065ceef2402eac62546d15530b7c","4b4c2ef485ef48eeb742b44a98945399","78430bfc7fe0489ebc77cb3fb5635d0c","3b114ec176c6465092b6883aa0279ad4","887aaa54afa04435be83626a60f2b9ec","58dc61a9aae343b58db7094c448644b2","10b9a904453e4823bda95f5dd74a96c8","ef80b4dc4fe14dc5ae532103c8dbd6bc","1d1dfe8dca894d679b1d10cb222f3baf","b6de56845c224280a28d024f3e4bd07b","bbe26ae34a0c4affbf98ffb838c6607d","a1b6f1d8867d48d39acd71fbcfff7c1c","e44d6c8cddf64e909f7b617f69ffeffd","41e9dd6eb89a4fa08179df947cee43bc","044983d07d764079b9638d2d19315547","95f2ba8d0bdd43efa3e3c29ac3102ae8","67c71644ddc848a0823f12a93c78a3d2","168ed3ab14134069aed51a1750369a7e","8fb4e65d26394c4abdaa84e9497c20f4","9f899e00cfb345a3a5e477f823b781fe","33f4567b255940279d193191f325594f","82de9949780f4a7db348d0af2c786215","955fd9eddeb34ab1902868b02b2ea591","4b46776ecb4348efa38a9425c6ee773f","3130be79a98e4f9da2a8720ce21199c7","315ac6a2f8234b1c93ae11575a0db8b4","832385f061154f62a0cd328234b9564a","bcd685a09ec2463299294230e4924f13","162c10961d5c4d44bd68180b271113f3","32a4530064f74829a61a6b6f9cf1f645","7d0a6ce152f04ee596cca7c8164700b1","bde3602f2fa54201be58f67e4396abc8","56e561040c4447b4b43d1b8c5de65866","580a7ea5d3c243a19b12ecf30120a74a","820921b5b22d423fb6869713d901a4be","532b7dbeb72540bfa7497c5944a9f5c9","1789534a5d584ddaa4116d57570e86ac","a417a55907604104b332b99d7adf88a3","62dec8c6fb7c4fc681ceae1affb6d1fd","79508ef03d4d46e6a67901fe2bcb0844","6e4b4e3f77554d49a6281c429d2facce","d74bffd204e04c37b3a6efe03be319ed","1d0a096ed66045b48050697310e58fa7","99ebe4439f9f407fb262dad9d007debb","3a94337f74974f068529f5ca7313844e","f59ecf78543341a29102252e76d1f66b","7c6958401c304e08a121795053491cb4","5be12ed5fcca48f19a62d5610f7045d1","d8f859c8714f4e2f996bb04208dc7e09","dc533b4f0d5f4e7babb8d7dba14f9def","696b106d109d4343b6c242d667c2946d","745fcaac948a4366805319df2f0718e2","3104f37d8b224f13af48f9c69d56537c","3516fb5d59614f8a9b205ec470b0677e","4e8d929c8269498bb36e1fd4cea34a4e","304760adf7024ac8a5a8dacfbb2b6654","be90c3c57592437ba564be88562eccec","f4c8e37081774e899f12afb2ef197857","f551a7a18595415f80030bdfaae12411","a18e5c27831145cdabd418f52d25c6c3","a874a81529e744d985764fd55ddf795b","9ea6528559b64642a3bbff880c12892b","5f1b08ed35e2472fb787730801d1ddb5","795060b47a514c1fa03e043d57f86cf4","5d617af5871f41f3b417ccf386289854","7980f124157a42f797dd22447a408778","a51fcf4b92334bac9c68d7acd959eea3","8d43cd1517d94ef9a46cbb648edb1f36","e4d1fe6ef2954fc0941f236b24f647d8","dfbb8f7548a74d7899438aaa3557dbfc","ba88d66795d54797be9e8397eacb1cad","8e63f9a65a6e491fb792ec709e2f9e1f","cf4d54bd21f244ada5762822daed4f30"]},"executionInfo":{"elapsed":29446,"status":"ok","timestamp":1696959693990,"user":{"displayName":"Nicholas Corrêa","userId":"09736120585766268588"},"user_tz":-120},"id":"7MbpXGu-v4f1","outputId":"89ec96c4-1e1a-4d7d-8079-74f97b5d0ade"},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"e0566242e89b4a6ab49507dcbf26e76b","version_major":2,"version_minor":0},"text/plain":["Downloading readme: 0%| | 0.00/2.81k [00:00, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"ef80b4dc4fe14dc5ae532103c8dbd6bc","version_major":2,"version_minor":0},"text/plain":["Downloading data files: 0%| | 0/2 [00:00, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"8fb4e65d26394c4abdaa84e9497c20f4","version_major":2,"version_minor":0},"text/plain":["Downloading data: 0%| | 0.00/26.7M [00:00, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"32a4530064f74829a61a6b6f9cf1f645","version_major":2,"version_minor":0},"text/plain":["Downloading data: 0%| | 0.00/29.5M [00:00, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"6e4b4e3f77554d49a6281c429d2facce","version_major":2,"version_minor":0},"text/plain":["Extracting data files: 0%| | 0/2 [00:00, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"745fcaac948a4366805319df2f0718e2","version_major":2,"version_minor":0},"text/plain":["Generating english split: 0%| | 0/41815 [00:00, ? examples/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"5f1b08ed35e2472fb787730801d1ddb5","version_major":2,"version_minor":0},"text/plain":["Generating portuguese split: 0%| | 0/41815 [00:00, ? examples/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["\n","
\n"," | prompt | \n","completion | \n","
---|---|---|
0 | \n","Why can camels survive for long without water? | \n","Camels can survive for long periods without wa... | \n","
1 | \n","Why mobile is bad for human | \n","Mobile phones as technological devices have be... | \n","
2 | \n","What is a polygon? | \n","A polygon is a two-dimensional geometric figur... | \n","
3 | \n","How do I start running? | \n","Starting a running routine can be a great way ... | \n","
4 | \n","Which episodes of season four of Game of Thron... | \n","Michelle MacLaren directed two episodes in Sea... | \n","
... | \n","... | \n","... | \n","
41810 | \n","What can I call you by? | \n","You can call me Aira. How may I assist you today? | \n","
41811 | \n","What's your identification name? | \n","As an artificial intelligence, I do not have a... | \n","
41812 | \n","What are you called? | \n","Hello! I am called Aira. How can I assist you ... | \n","
41813 | \n","How do you introduce yourself? | \n","Hello! I am Aira, your helpful, respectful, an... | \n","
41814 | \n","What should I say when I address you? | \n","When addressing me, you can simply call me Air... | \n","
41815 rows × 2 columns
\n","