{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "finetuning.ipynb", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "accelerator": "GPU" }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "duPhpC7UYvOb", "colab_type": "text" }, "source": [ "#**Setup**\n", "\n", "---\n", "\n" ] }, { "cell_type": "code", "metadata": { "id": "08KZUbQnhKwE", "colab_type": "code", "colab": {} }, "source": [ "\n", "!git clone https://github.com/ai4bharat/indic-bert\n", "%cd indic-bert\n", "!pip3 install -r requirements.txt\n", "%cd ..\n", "!mkdir indic-glue outputs" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "u4TbQgpAYrSL", "colab_type": "text" }, "source": [ "#**Download Datasets**\n", "---\n" ] }, { "cell_type": "code", "metadata": { "id": "B8Te43TtV9OV", "colab_type": "code", "colab": {} }, "source": [ "\n", "% cd indic-glue\n", "# Download the dataset -- insert link obtained from https://indicnlp.ai4bharat.org/indic-glue/#downloads\n", "!wget https://storage.googleapis.com/ai4bharat-public-indic-nlp-corpora/evaluations/wiki-cloze.tar.gz\n", "!tar -xaf wiki-cloze.tar.gz\n", "% cd ..\n" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "cAs6r-QSUosR", "colab_type": "text" }, "source": [ "#**Fine-tune the Model**\n", "---\n", "\n" ] }, { "cell_type": "code", "metadata": { "id": "vUCIjREpQFhv", "colab_type": "code", "colab": {} }, "source": [ "\n", "%cd indic-bert\n", "\n", "import os\n", "\n", "from fine_tune.cli import main as finetune_main\n", "\n", "argvec = ['--lang', 'gu',\n", " '--dataset', 'wiki-cloze', # use the right dataset key, check https://github.com/AI4Bharat/indic-bert/blob/master/fine_tune/cli.py#L10\n", " '--model', 'ai4bharat/indic-bert',\n", " '--iglue_dir', '../indic-glue',\n", " '--output_dir', '../outputs',\n", " '--max_seq_length', '128',\n", " '--learning_rate', '2e-5',\n", " '--num_train_epochs', '3',\n", " '--train_batch_size', '32'\n", "]\n", "\n", "finetune_main(argvec)" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "zUh3Vw7SUwMW", "colab_type": "text" }, "source": [ "#**Check the Results**\n", "---" ] }, { "cell_type": "code", "metadata": { "id": "Ic0Qpfl-U0Xw", "colab_type": "code", "colab": {} }, "source": [ "!cat /content/outputs/wiki-cloze/gu-gu/model-ai4bharat-indic-bert/test_results.txt" ], "execution_count": null, "outputs": [] } ] }