{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "machine_shape": "hm",
      "gpuType": "A100"
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU",
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "c06bbba05e10462d993f3e7e6f932cf1": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_6f734c35284341d891a44694ddc55b2c",
              "IPY_MODEL_193be53200ab436a967f1ea4807053e2",
              "IPY_MODEL_85a2415f14284237875b349b4c414e21"
            ],
            "layout": "IPY_MODEL_3ab931b2fcc0493ca71923ebc37127c7"
          }
        },
        "6f734c35284341d891a44694ddc55b2c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_f49472d0536f4011be17902f9e827807",
            "placeholder": "​",
            "style": "IPY_MODEL_2f341cb76f254d0da913faec6a82f762",
            "value": "Fetching 3 files: 100%"
          }
        },
        "193be53200ab436a967f1ea4807053e2": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_24ebf383723f4de494f9808b41222751",
            "max": 3,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_0c17aa90672046c9bd2f293b1a998b46",
            "value": 3
          }
        },
        "85a2415f14284237875b349b4c414e21": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_af89033247b34da2a5cded73b0beade2",
            "placeholder": "​",
            "style": "IPY_MODEL_0ea5f5e6ab26484ab22dcf5576f796d1",
            "value": " 3/3 [01:49&lt;00:00, 109.92s/it]"
          }
        },
        "3ab931b2fcc0493ca71923ebc37127c7": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "f49472d0536f4011be17902f9e827807": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "2f341cb76f254d0da913faec6a82f762": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "24ebf383723f4de494f9808b41222751": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "0c17aa90672046c9bd2f293b1a998b46": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "af89033247b34da2a5cded73b0beade2": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "0ea5f5e6ab26484ab22dcf5576f796d1": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "946ce9afeddb4da5a36e81e5ada9d957": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_95782b84af1c4014ae04c9e6c9131cbe",
              "IPY_MODEL_2332101edec848219c3b0c6026c2a722",
              "IPY_MODEL_a2638b3f10a24de99bb940dcd150ab53"
            ],
            "layout": "IPY_MODEL_1c78955b41ba4845931a250f16b753b5"
          }
        },
        "95782b84af1c4014ae04c9e6c9131cbe": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_148a8c3e4fad4cefa16a478a9758fdc5",
            "placeholder": "​",
            "style": "IPY_MODEL_92ef027f2cc940b5b09521328de550b0",
            "value": "tokenizer.model.v3: 100%"
          }
        },
        "2332101edec848219c3b0c6026c2a722": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_1a3d2764c7fc41dcb97489e84c28093e",
            "max": 587404,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_c7b6155f0f844c67b3a2b805570fd6f9",
            "value": 587404
          }
        },
        "a2638b3f10a24de99bb940dcd150ab53": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_1eec374aa3414838a9b41e5db1fefd50",
            "placeholder": "​",
            "style": "IPY_MODEL_6fc8cf7aa81c4043878ac854b289dbe3",
            "value": " 587k/587k [00:00&lt;00:00, 5.39MB/s]"
          }
        },
        "1c78955b41ba4845931a250f16b753b5": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "148a8c3e4fad4cefa16a478a9758fdc5": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "92ef027f2cc940b5b09521328de550b0": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "1a3d2764c7fc41dcb97489e84c28093e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "c7b6155f0f844c67b3a2b805570fd6f9": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "1eec374aa3414838a9b41e5db1fefd50": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "6fc8cf7aa81c4043878ac854b289dbe3": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "b1aab1a3b5914048962a6d7d63401425": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_10fe81122e6442f28608766d90749790",
              "IPY_MODEL_f7e72f0a87bc421b82a59ae9ad33a4cb",
              "IPY_MODEL_2188b8f9491b4d3e8861e40e7c4f6a46"
            ],
            "layout": "IPY_MODEL_b3bf1880a5844f8c89096ced830fc954"
          }
        },
        "10fe81122e6442f28608766d90749790": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_7e0495ffdeb74675847e5e4c2104cc34",
            "placeholder": "​",
            "style": "IPY_MODEL_3e0e828a21f24944b68a66eafb52f62b",
            "value": "params.json: 100%"
          }
        },
        "f7e72f0a87bc421b82a59ae9ad33a4cb": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_ee07a8e2427c4fc9bd09b27ad11e968a",
            "max": 202,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_d35818e4f9454d26aa475826e08ea4f0",
            "value": 202
          }
        },
        "2188b8f9491b4d3e8861e40e7c4f6a46": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_e3f7c4fea8494af2a473cf61adccf270",
            "placeholder": "​",
            "style": "IPY_MODEL_7e18402efdb34d708b7917964ac791de",
            "value": " 202/202 [00:00&lt;00:00, 12.7kB/s]"
          }
        },
        "b3bf1880a5844f8c89096ced830fc954": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "7e0495ffdeb74675847e5e4c2104cc34": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "3e0e828a21f24944b68a66eafb52f62b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "ee07a8e2427c4fc9bd09b27ad11e968a": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "d35818e4f9454d26aa475826e08ea4f0": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "e3f7c4fea8494af2a473cf61adccf270": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "7e18402efdb34d708b7917964ac791de": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "1442445cdf89487784d4a39919fec6bf": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_2ff8ebe8d132411585a05b852362c406",
              "IPY_MODEL_ef719bb991714d91a365226c5a2ca9df",
              "IPY_MODEL_1727f9b019e9477282d010e96b7dd4c3"
            ],
            "layout": "IPY_MODEL_f82b841d7e5b45229119bd3195e5b12f"
          }
        },
        "2ff8ebe8d132411585a05b852362c406": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_feb470b16b4249daaa19c1344d036f0a",
            "placeholder": "​",
            "style": "IPY_MODEL_2cc30eef6d7b46d283fcfd0e7abca6ea",
            "value": "consolidated.safetensors: 100%"
          }
        },
        "ef719bb991714d91a365226c5a2ca9df": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_83b554bec0fd40dd9bd9e4601f2f98a3",
            "max": 14496078512,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_1c19998de61c4e2dad6647fdc4ca4358",
            "value": 14496078512
          }
        },
        "1727f9b019e9477282d010e96b7dd4c3": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_25df0dd9481a4e0ba21d2f4f4ffdba2e",
            "placeholder": "​",
            "style": "IPY_MODEL_cbf620ae5196446c84528feaed64ae6a",
            "value": " 14.5G/14.5G [01:49&lt;00:00, 87.0MB/s]"
          }
        },
        "f82b841d7e5b45229119bd3195e5b12f": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "feb470b16b4249daaa19c1344d036f0a": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "2cc30eef6d7b46d283fcfd0e7abca6ea": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "83b554bec0fd40dd9bd9e4601f2f98a3": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "1c19998de61c4e2dad6647fdc4ca4358": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "25df0dd9481a4e0ba21d2f4f4ffdba2e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "cbf620ae5196446c84528feaed64ae6a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        }
      }
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "# Getting starting fine-tuning Mistral 7B\n",
        "\n",
        "This notebook shows you a simple example of how to LoRA finetune Mistral 7B. You can can run this notebook in Google Colab with Pro + account with A100 and 40GB RAM.\n",
        "\n",
        "<a target=\"_blank\" href=\"https://colab.research.google.com/github/mistralai/mistral-finetune/blob/main/tutorials/mistral_finetune_7b.ipynb\">\n",
        "  <img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/>\n",
        "</a>\n",
        "\n",
        "\n",
        "Check out `mistral-finetune` Github repo to learn more: https://github.com/mistralai/mistral-finetune/"
      ],
      "metadata": {
        "id": "RyuOCYM92LJb"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Installation\n",
        "\n",
        "Clone the `mistral-finetune` repo:\n"
      ],
      "metadata": {
        "id": "yxr8mv-17GfB"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "%cd /content/\n",
        "!git clone https://github.com/mistralai/mistral-finetune.git"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "TIj3IlIeVDIb",
        "outputId": "6ffd6946-26a2-4e3d-e6db-3336cc2c7444"
      },
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "/content\n",
            "Cloning into 'mistral-finetune'...\n",
            "remote: Enumerating objects: 62, done.\u001b[K\n",
            "remote: Counting objects: 100% (62/62), done.\u001b[K\n",
            "remote: Compressing objects: 100% (55/55), done.\u001b[K\n",
            "remote: Total 62 (delta 6), reused 59 (delta 4), pack-reused 0\u001b[K\n",
            "Receiving objects: 100% (62/62), 90.16 KiB | 3.00 MiB/s, done.\n",
            "Resolving deltas: 100% (6/6), done.\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "Install all required dependencies:"
      ],
      "metadata": {
        "id": "mQPd_pGT7WiY"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install -r /content/mistral-finetune/requirements.txt"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "KuTOGipl7BS7",
        "outputId": "0d332b99-54b1-431b-eb41-4b929087040c"
      },
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Requirement already satisfied: fire in /usr/local/lib/python3.10/dist-packages (from -r /content/mistral-finetune/requirements.txt (line 1)) (0.6.0)\n",
            "Requirement already satisfied: simple-parsing in /usr/local/lib/python3.10/dist-packages (from -r /content/mistral-finetune/requirements.txt (line 2)) (0.1.5)\n",
            "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from -r /content/mistral-finetune/requirements.txt (line 3)) (6.0.1)\n",
            "Requirement already satisfied: mistral-common>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from -r /content/mistral-finetune/requirements.txt (line 4)) (1.1.0)\n",
            "Requirement already satisfied: safetensors in /usr/local/lib/python3.10/dist-packages (from -r /content/mistral-finetune/requirements.txt (line 5)) (0.4.3)\n",
            "Requirement already satisfied: tensorboard in /usr/local/lib/python3.10/dist-packages (from -r /content/mistral-finetune/requirements.txt (line 6)) (2.15.2)\n",
            "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from -r /content/mistral-finetune/requirements.txt (line 7)) (4.66.4)\n",
            "Requirement already satisfied: torch==2.2 in /usr/local/lib/python3.10/dist-packages (from -r /content/mistral-finetune/requirements.txt (line 9)) (2.2.0)\n",
            "Requirement already satisfied: triton==2.2 in /usr/local/lib/python3.10/dist-packages (from -r /content/mistral-finetune/requirements.txt (line 10)) (2.2.0)\n",
            "Requirement already satisfied: xformers==0.0.24 in /usr/local/lib/python3.10/dist-packages (from -r /content/mistral-finetune/requirements.txt (line 11)) (0.0.24)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (3.14.0)\n",
            "Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (4.11.0)\n",
            "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (1.12)\n",
            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (3.3)\n",
            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (3.1.4)\n",
            "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (2023.6.0)\n",
            "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (12.1.105)\n",
            "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (12.1.105)\n",
            "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (12.1.105)\n",
            "Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /usr/local/lib/python3.10/dist-packages (from torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (8.9.2.26)\n",
            "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.10/dist-packages (from torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (12.1.3.1)\n",
            "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.10/dist-packages (from torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (11.0.2.54)\n",
            "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.10/dist-packages (from torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (10.3.2.106)\n",
            "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.10/dist-packages (from torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (11.4.5.107)\n",
            "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.10/dist-packages (from torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (12.1.0.106)\n",
            "Requirement already satisfied: nvidia-nccl-cu12==2.19.3 in /usr/local/lib/python3.10/dist-packages (from torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (2.19.3)\n",
            "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (12.1.105)\n",
            "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from xformers==0.0.24->-r /content/mistral-finetune/requirements.txt (line 11)) (1.25.2)\n",
            "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.10/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (12.5.40)\n",
            "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from fire->-r /content/mistral-finetune/requirements.txt (line 1)) (1.16.0)\n",
            "Requirement already satisfied: termcolor in /usr/local/lib/python3.10/dist-packages (from fire->-r /content/mistral-finetune/requirements.txt (line 1)) (2.4.0)\n",
            "Requirement already satisfied: docstring-parser~=0.15 in /usr/local/lib/python3.10/dist-packages (from simple-parsing->-r /content/mistral-finetune/requirements.txt (line 2)) (0.16)\n",
            "Requirement already satisfied: jsonschema==4.21.1 in /usr/local/lib/python3.10/dist-packages (from mistral-common>=1.1.0->-r /content/mistral-finetune/requirements.txt (line 4)) (4.21.1)\n",
            "Requirement already satisfied: pydantic==2.6.1 in /usr/local/lib/python3.10/dist-packages (from mistral-common>=1.1.0->-r /content/mistral-finetune/requirements.txt (line 4)) (2.6.1)\n",
            "Requirement already satisfied: sentencepiece==0.1.99 in /usr/local/lib/python3.10/dist-packages (from mistral-common>=1.1.0->-r /content/mistral-finetune/requirements.txt (line 4)) (0.1.99)\n",
            "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema==4.21.1->mistral-common>=1.1.0->-r /content/mistral-finetune/requirements.txt (line 4)) (23.2.0)\n",
            "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema==4.21.1->mistral-common>=1.1.0->-r /content/mistral-finetune/requirements.txt (line 4)) (2023.12.1)\n",
            "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema==4.21.1->mistral-common>=1.1.0->-r /content/mistral-finetune/requirements.txt (line 4)) (0.35.1)\n",
            "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema==4.21.1->mistral-common>=1.1.0->-r /content/mistral-finetune/requirements.txt (line 4)) (0.18.1)\n",
            "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic==2.6.1->mistral-common>=1.1.0->-r /content/mistral-finetune/requirements.txt (line 4)) (0.7.0)\n",
            "Requirement already satisfied: pydantic-core==2.16.2 in /usr/local/lib/python3.10/dist-packages (from pydantic==2.6.1->mistral-common>=1.1.0->-r /content/mistral-finetune/requirements.txt (line 4)) (2.16.2)\n",
            "Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (1.4.0)\n",
            "Requirement already satisfied: grpcio>=1.48.2 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (1.64.0)\n",
            "Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (2.27.0)\n",
            "Requirement already satisfied: google-auth-oauthlib<2,>=0.5 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (1.2.0)\n",
            "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (3.6)\n",
            "Requirement already satisfied: protobuf!=4.24.0,>=3.19.6 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (3.20.3)\n",
            "Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (2.31.0)\n",
            "Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (67.7.2)\n",
            "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (0.7.2)\n",
            "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (3.0.3)\n",
            "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (5.3.3)\n",
            "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (0.4.0)\n",
            "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (4.9)\n",
            "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from google-auth-oauthlib<2,>=0.5->tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (1.3.1)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (3.3.2)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (3.7)\n",
            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (2.0.7)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (2024.2.2)\n",
            "Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.10/dist-packages (from werkzeug>=1.0.1->tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (2.1.5)\n",
            "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (1.3.0)\n",
            "Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (0.6.0)\n",
            "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<2,>=0.5->tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (3.2.2)\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Model download"
      ],
      "metadata": {
        "id": "LgdIAi257jLo"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!wget https://models.mistralcdn.com/mistral-7b-v0-3/mistral-7B-v0.3.tar"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "cdl_R5baUyha",
        "outputId": "8ddcc9d2-5088-47a8-b5f7-d73c89063246"
      },
      "execution_count": 5,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "--2024-05-24 18:50:25--  https://models.mistralcdn.com/mistral-7b-v0-3/mistral-7B-v0.3.tar\n",
            "Resolving models.mistralcdn.com (models.mistralcdn.com)... 104.26.6.117, 104.26.7.117, 172.67.70.68, ...\n",
            "Connecting to models.mistralcdn.com (models.mistralcdn.com)|104.26.6.117|:443... connected.\n",
            "HTTP request sent, awaiting response... 200 OK\n",
            "Length: 14496675840 (14G) [application/x-tar]\n",
            "Saving to: ‘mistral-7B-v0.3.tar’\n",
            "\n",
            "mistral-7B-v0.3.tar 100%[===================>]  13.50G  40.5MB/s    in 6m 3s   \n",
            "\n",
            "2024-05-24 18:56:29 (38.1 MB/s) - ‘mistral-7B-v0.3.tar’ saved [14496675840/14496675840]\n",
            "\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!DIR=/content/mistral_models && mkdir -p $DIR && tar -xf mistral-7B-v0.3.tar -C $DIR"
      ],
      "metadata": {
        "id": "IgJWR-fReilz"
      },
      "execution_count": 7,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Alternatively, you can download the model from Hugging Face\n",
        "\n",
        "# !pip install huggingface_hub\n",
        "# from huggingface_hub import snapshot_download\n",
        "# from pathlib import Path\n",
        "\n",
        "# mistral_models_path = Path.home().joinpath('mistral_models', '7B-v0.3')\n",
        "# mistral_models_path.mkdir(parents=True, exist_ok=True)\n",
        "\n",
        "# snapshot_download(repo_id=\"mistralai/Mistral-7B-v0.3\", allow_patterns=[\"params.json\", \"consolidated.safetensors\", \"tokenizer.model.v3\"], local_dir=mistral_models_path)\n",
        "\n",
        "#! cp -r /root/mistral_models/7B-v0.3 /content/mistral_models\n",
        "#! rm -r /root/mistral_models/7B-v0.3"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 163,
          "referenced_widgets": [
            "c06bbba05e10462d993f3e7e6f932cf1",
            "6f734c35284341d891a44694ddc55b2c",
            "193be53200ab436a967f1ea4807053e2",
            "85a2415f14284237875b349b4c414e21",
            "3ab931b2fcc0493ca71923ebc37127c7",
            "f49472d0536f4011be17902f9e827807",
            "2f341cb76f254d0da913faec6a82f762",
            "24ebf383723f4de494f9808b41222751",
            "0c17aa90672046c9bd2f293b1a998b46",
            "af89033247b34da2a5cded73b0beade2",
            "0ea5f5e6ab26484ab22dcf5576f796d1",
            "946ce9afeddb4da5a36e81e5ada9d957",
            "95782b84af1c4014ae04c9e6c9131cbe",
            "2332101edec848219c3b0c6026c2a722",
            "a2638b3f10a24de99bb940dcd150ab53",
            "1c78955b41ba4845931a250f16b753b5",
            "148a8c3e4fad4cefa16a478a9758fdc5",
            "92ef027f2cc940b5b09521328de550b0",
            "1a3d2764c7fc41dcb97489e84c28093e",
            "c7b6155f0f844c67b3a2b805570fd6f9",
            "1eec374aa3414838a9b41e5db1fefd50",
            "6fc8cf7aa81c4043878ac854b289dbe3",
            "b1aab1a3b5914048962a6d7d63401425",
            "10fe81122e6442f28608766d90749790",
            "f7e72f0a87bc421b82a59ae9ad33a4cb",
            "2188b8f9491b4d3e8861e40e7c4f6a46",
            "b3bf1880a5844f8c89096ced830fc954",
            "7e0495ffdeb74675847e5e4c2104cc34",
            "3e0e828a21f24944b68a66eafb52f62b",
            "ee07a8e2427c4fc9bd09b27ad11e968a",
            "d35818e4f9454d26aa475826e08ea4f0",
            "e3f7c4fea8494af2a473cf61adccf270",
            "7e18402efdb34d708b7917964ac791de",
            "1442445cdf89487784d4a39919fec6bf",
            "2ff8ebe8d132411585a05b852362c406",
            "ef719bb991714d91a365226c5a2ca9df",
            "1727f9b019e9477282d010e96b7dd4c3",
            "f82b841d7e5b45229119bd3195e5b12f",
            "feb470b16b4249daaa19c1344d036f0a",
            "2cc30eef6d7b46d283fcfd0e7abca6ea",
            "83b554bec0fd40dd9bd9e4601f2f98a3",
            "1c19998de61c4e2dad6647fdc4ca4358",
            "25df0dd9481a4e0ba21d2f4f4ffdba2e",
            "cbf620ae5196446c84528feaed64ae6a"
          ]
        },
        "id": "qgjAADBFHB0S",
        "outputId": "6dd98910-36fd-4dc1-c5b8-77bb4c104e05"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "c06bbba05e10462d993f3e7e6f932cf1"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "tokenizer.model.v3:   0%|          | 0.00/587k [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "946ce9afeddb4da5a36e81e5ada9d957"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "params.json:   0%|          | 0.00/202 [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "b1aab1a3b5914048962a6d7d63401425"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "consolidated.safetensors:   0%|          | 0.00/14.5G [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "1442445cdf89487784d4a39919fec6bf"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "'/root/mistral_models/7B-v0.3'"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "string"
            }
          },
          "metadata": {},
          "execution_count": 7
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!ls /content/mistral_models"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "3PxYGmcy4gu0",
        "outputId": "71912866-1a50-4407-ac34-42e23927afd9"
      },
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "consolidated.safetensors  params.json  tokenizer.model.v3\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Prepare dataset\n",
        "\n",
        "To ensure effective training, mistral-finetune has strict requirements for how the training data has to be formatted. Check out the required data formatting [here](https://github.com/mistralai/mistral-finetune/tree/main?tab=readme-ov-file#prepare-dataset).\n",
        "\n",
        "In this example, let’s use the ultrachat_200k dataset. We load a chunk of the data into Pandas Dataframes, split the data into training and validation, and save the data into the required `jsonl` format for fine-tuning."
      ],
      "metadata": {
        "id": "ams-19wF8zgY"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "%cd /content/"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "T33N2SwCIhEl",
        "outputId": "08482351-d462-436f-81d2-9b9489b78265"
      },
      "execution_count": 9,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "/content\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# make a new directory called data\n",
        "!mkdir -p data"
      ],
      "metadata": {
        "id": "i7bmgXvG1vUq"
      },
      "execution_count": 10,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# navigate to this data directory\n",
        "%cd /content/data"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Br2czKwwFLE8",
        "outputId": "0dd1a5fd-1285-48b7-8961-a640eec81d7d"
      },
      "execution_count": 11,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "/content/data\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# read data into a pandas dataframe\n",
        "import pandas as pd\n",
        "\n",
        "df = pd.read_parquet('https://huggingface.co/datasets/HuggingFaceH4/ultrachat_200k/resolve/main/data/test_gen-00000-of-00001-3d4cd8309148a71f.parquet')"
      ],
      "metadata": {
        "id": "RVF8VqU110sB"
      },
      "execution_count": 12,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# split data into training and evaluation\n",
        "df_train=df.sample(frac=0.95,random_state=200)\n",
        "df_eval=df.drop(df_train.index)"
      ],
      "metadata": {
        "id": "Qog1ZEUn12KQ"
      },
      "execution_count": 13,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# save data into .jsonl files\n",
        "df_train.to_json(\"ultrachat_chunk_train.jsonl\", orient=\"records\", lines=True)\n",
        "df_eval.to_json(\"ultrachat_chunk_eval.jsonl\", orient=\"records\", lines=True)"
      ],
      "metadata": {
        "id": "I4Yb3NJp13sG"
      },
      "execution_count": 14,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!ls /content/data"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Rc9q_g7EFQLf",
        "outputId": "a4d6e26a-557d-46ac-dca4-7739dddc5513"
      },
      "execution_count": 15,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "ultrachat_chunk_eval.jsonl  ultrachat_chunk_train.jsonl\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# navigate to the mistral-finetune directory\n",
        "%cd /content/mistral-finetune/"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "YIK0VFXHIn8r",
        "outputId": "ac4d292a-6870-440a-c8fe-3a5c9e5b8d2b"
      },
      "execution_count": 16,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "/content/mistral-finetune\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# some of the training data doesn't have the right format,\n",
        "# so we need to reformat the data into the correct format and skip the cases that doesn't have the right format:\n",
        "\n",
        "!python -m utils.reformat_data /content/data/ultrachat_chunk_train.jsonl"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "vLHNxpN4GS3i",
        "outputId": "8a4a0b11-238c-4bc7-beed-f7bb28b05536"
      },
      "execution_count": 17,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Skip 3674th sample\n",
            "Skip 9176th sample\n",
            "Skip 10559th sample\n",
            "Skip 13293th sample\n",
            "Skip 13973th sample\n",
            "Skip 15219th sample\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# eval data looks all good\n",
        "!python -m utils.reformat_data /content/data/ultrachat_chunk_eval.jsonl"
      ],
      "metadata": {
        "id": "RscZFo7tGvzS"
      },
      "execution_count": 18,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Now you can verify your training yaml to make sure the data is correctly formatted and to get an estimate of your training time.\n",
        "\n",
        "!python -m utils.validate_data --train_yaml example/7B.yaml\n"
      ],
      "metadata": {
        "id": "fqhyigF8XVUE",
        "outputId": "e2159431-ce27-482e-aa75-a7723f1cdb8b",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "0it [00:00, ?it/s]Validating /content/data/ultrachat_chunk_train.jsonl ...\n",
            "\n",
            "  0% 0/26883 [00:00<?, ?it/s]\u001b[A\n",
            "  0% 26/26883 [00:00<01:43, 259.48it/s]\u001b[A\n",
            "  0% 55/26883 [00:00<01:38, 273.45it/s]\u001b[A\n",
            "  0% 89/26883 [00:00<01:28, 301.11it/s]\u001b[A\n",
            "  0% 126/26883 [00:00<01:21, 327.48it/s]\u001b[A\n",
            "  1% 161/26883 [00:00<01:20, 332.40it/s]\u001b[A\n",
            "  1% 195/26883 [00:00<01:25, 313.60it/s]\u001b[A\n",
            "  1% 227/26883 [00:00<01:24, 315.12it/s]\u001b[A\n",
            "  1% 259/26883 [00:00<01:24, 315.73it/s]\u001b[A\n",
            "  1% 298/26883 [00:00<01:19, 336.05it/s]\u001b[A\n",
            "  1% 335/26883 [00:01<01:16, 345.01it/s]\u001b[A\n",
            "  1% 370/26883 [00:01<01:17, 342.86it/s]\u001b[A\n",
            "  2% 405/26883 [00:01<01:17, 342.26it/s]\u001b[A\n",
            "  2% 442/26883 [00:01<01:16, 347.88it/s]\u001b[A\n",
            "  2% 477/26883 [00:01<01:17, 340.39it/s]\u001b[A\n",
            "  2% 514/26883 [00:01<01:16, 346.71it/s]\u001b[A\n",
            "  2% 549/26883 [00:01<01:20, 328.39it/s]\u001b[A\n",
            "  2% 583/26883 [00:01<01:21, 324.57it/s]\u001b[A\n",
            "  2% 623/26883 [00:01<01:15, 345.82it/s]\u001b[A\n",
            "  2% 662/26883 [00:01<01:13, 355.67it/s]\u001b[A\n",
            "  3% 698/26883 [00:02<01:18, 332.96it/s]\u001b[A\n",
            "  3% 732/26883 [00:02<01:21, 322.81it/s]\u001b[A\n",
            "  3% 765/26883 [00:02<01:21, 322.14it/s]\u001b[A\n",
            "  3% 798/26883 [00:02<01:20, 322.26it/s]\u001b[A\n",
            "  3% 831/26883 [00:02<01:22, 317.47it/s]\u001b[A\n",
            "  3% 863/26883 [00:02<01:23, 311.38it/s]\u001b[A\n",
            "  3% 901/26883 [00:02<01:19, 326.51it/s]\u001b[A\n",
            "  3% 939/26883 [00:02<01:16, 340.57it/s]\u001b[A\n",
            "  4% 974/26883 [00:02<01:18, 332.07it/s]\u001b[A\n",
            "  4% 1011/26883 [00:03<01:15, 341.27it/s]\u001b[A\n",
            "  4% 1046/26883 [00:03<01:19, 323.37it/s]\u001b[A\n",
            "  4% 1079/26883 [00:03<01:22, 314.07it/s]\u001b[A\n",
            "  4% 1113/26883 [00:03<01:21, 317.45it/s]\u001b[A\n",
            "  4% 1146/26883 [00:03<01:20, 319.84it/s]\u001b[A\n",
            "  4% 1179/26883 [00:03<01:20, 318.31it/s]\u001b[A\n",
            "  5% 1211/26883 [00:03<01:23, 308.61it/s]\u001b[A\n",
            "  5% 1242/26883 [00:03<01:24, 303.88it/s]\u001b[A\n",
            "  5% 1273/26883 [00:03<01:25, 300.82it/s]\u001b[A\n",
            "  5% 1305/26883 [00:04<01:24, 304.23it/s]\u001b[A\n",
            "  5% 1336/26883 [00:04<01:23, 305.70it/s]\u001b[A\n",
            "  5% 1369/26883 [00:04<01:22, 308.63it/s]\u001b[A\n",
            "  5% 1403/26883 [00:04<01:21, 312.28it/s]\u001b[A\n",
            "  5% 1435/26883 [00:04<01:25, 298.54it/s]\u001b[A\n",
            "  5% 1467/26883 [00:04<01:23, 303.69it/s]\u001b[A\n",
            "  6% 1499/26883 [00:04<01:22, 306.54it/s]\u001b[A\n",
            "  6% 1530/26883 [00:04<01:23, 302.71it/s]\u001b[A\n",
            "  6% 1562/26883 [00:04<01:22, 307.13it/s]\u001b[A\n",
            "  6% 1604/26883 [00:04<01:15, 334.68it/s]\u001b[A\n",
            "  6% 1638/26883 [00:05<01:18, 322.57it/s]\u001b[A\n",
            "  6% 1672/26883 [00:05<01:17, 325.46it/s]\u001b[A\n",
            "  6% 1706/26883 [00:05<01:17, 326.42it/s]\u001b[A\n",
            "  6% 1739/26883 [00:05<01:17, 322.57it/s]\u001b[A\n",
            "  7% 1772/26883 [00:05<01:18, 321.25it/s]\u001b[A\n",
            "  7% 1805/26883 [00:05<01:17, 321.92it/s]\u001b[A\n",
            "  7% 1843/26883 [00:05<01:14, 337.33it/s]\u001b[A\n",
            "  7% 1877/26883 [00:05<01:14, 333.95it/s]\u001b[A\n",
            "  7% 1911/26883 [00:05<01:15, 330.66it/s]\u001b[A\n",
            "  7% 1949/26883 [00:06<01:12, 344.61it/s]\u001b[A\n",
            "  7% 1984/26883 [00:06<01:15, 329.22it/s]\u001b[A\n",
            "  8% 2018/26883 [00:06<01:15, 329.25it/s]\u001b[A\n",
            "  8% 2053/26883 [00:06<01:14, 334.02it/s]\u001b[A\n",
            "  8% 2090/26883 [00:06<01:12, 342.50it/s]\u001b[A\n",
            "  8% 2125/26883 [00:06<01:17, 320.99it/s]\u001b[A\n",
            "  8% 2164/26883 [00:06<01:13, 337.79it/s]\u001b[A\n",
            "  8% 2199/26883 [00:06<01:16, 320.60it/s]\u001b[A\n",
            "  8% 2236/26883 [00:06<01:14, 331.44it/s]\u001b[A\n",
            "  8% 2273/26883 [00:06<01:12, 338.70it/s]\u001b[A\n",
            "  9% 2308/26883 [00:07<01:14, 331.49it/s]\u001b[A\n",
            "  9% 2342/26883 [00:07<01:15, 323.57it/s]\u001b[A\n",
            "  9% 2375/26883 [00:07<01:16, 320.43it/s]\u001b[A\n",
            "  9% 2408/26883 [00:07<01:19, 309.65it/s]\u001b[A\n",
            "  9% 2440/26883 [00:07<01:22, 296.93it/s]\u001b[A\n",
            "  9% 2470/26883 [00:07<01:25, 286.52it/s]\u001b[A\n",
            "  9% 2501/26883 [00:07<01:23, 292.09it/s]\u001b[A\n",
            "  9% 2535/26883 [00:07<01:19, 304.97it/s]\u001b[A\n",
            " 10% 2567/26883 [00:07<01:18, 308.97it/s]\u001b[A\n",
            " 10% 2602/26883 [00:08<01:16, 319.39it/s]\u001b[A\n",
            " 10% 2635/26883 [00:08<01:18, 310.33it/s]\u001b[A\n",
            " 10% 2667/26883 [00:08<01:17, 310.52it/s]\u001b[A\n",
            " 10% 2708/26883 [00:08<01:11, 335.87it/s]\u001b[A\n",
            " 10% 2745/26883 [00:08<01:10, 340.16it/s]\u001b[A\n",
            " 10% 2780/26883 [00:08<01:14, 321.57it/s]\u001b[A\n",
            " 10% 2814/26883 [00:08<01:13, 325.64it/s]\u001b[A\n",
            " 11% 2847/26883 [00:08<01:15, 320.22it/s]\u001b[A\n",
            " 11% 2881/26883 [00:08<01:14, 322.02it/s]\u001b[A\n",
            " 11% 2914/26883 [00:09<01:14, 322.51it/s]\u001b[A\n",
            " 11% 2952/26883 [00:09<01:10, 337.90it/s]\u001b[A\n",
            " 11% 2986/26883 [00:09<01:11, 334.30it/s]\u001b[A\n",
            " 11% 3023/26883 [00:09<01:09, 344.04it/s]\u001b[A\n",
            " 11% 3058/26883 [00:09<01:11, 332.07it/s]\u001b[A\n",
            " 12% 3092/26883 [00:09<01:12, 329.33it/s]\u001b[A\n",
            " 12% 3126/26883 [00:09<01:14, 320.50it/s]\u001b[A\n",
            " 12% 3159/26883 [00:09<01:15, 316.03it/s]\u001b[A\n",
            " 12% 3192/26883 [00:09<01:14, 317.70it/s]\u001b[A\n",
            " 12% 3226/26883 [00:09<01:13, 323.70it/s]\u001b[A\n",
            " 12% 3259/26883 [00:10<01:13, 321.21it/s]\u001b[A\n",
            " 12% 3295/26883 [00:10<01:11, 331.36it/s]\u001b[A\n",
            " 12% 3331/26883 [00:10<01:09, 337.57it/s]\u001b[A\n",
            " 13% 3365/26883 [00:10<01:12, 324.47it/s]\u001b[A\n",
            " 13% 3398/26883 [00:10<01:13, 317.93it/s]\u001b[A\n",
            " 13% 3431/26883 [00:10<01:14, 315.09it/s]\u001b[A\n",
            " 13% 3464/26883 [00:10<01:13, 317.81it/s]\u001b[A\n",
            " 13% 3498/26883 [00:10<01:13, 318.85it/s]\u001b[A\n",
            " 13% 3532/26883 [00:10<01:12, 323.79it/s]\u001b[A\n",
            " 13% 3567/26883 [00:11<01:10, 330.31it/s]\u001b[A\n",
            " 13% 3601/26883 [00:11<01:12, 323.21it/s]\u001b[A\n",
            " 14% 3634/26883 [00:11<01:13, 316.89it/s]\u001b[A\n",
            " 14% 3669/26883 [00:11<01:11, 325.47it/s]\u001b[A\n",
            " 14% 3702/26883 [00:11<01:16, 303.00it/s]\u001b[A\n",
            " 14% 3734/26883 [00:11<01:15, 307.01it/s]\u001b[A\n",
            " 14% 3765/26883 [00:11<01:16, 301.42it/s]\u001b[A\n",
            " 14% 3796/26883 [00:11<01:16, 300.43it/s]\u001b[A\n",
            " 14% 3832/26883 [00:11<01:13, 315.38it/s]\u001b[A\n",
            " 14% 3864/26883 [00:12<01:13, 311.60it/s]\u001b[A\n",
            " 14% 3898/26883 [00:12<01:11, 319.56it/s]\u001b[A\n",
            " 15% 3931/26883 [00:12<01:15, 305.31it/s]\u001b[A\n",
            " 15% 3964/26883 [00:12<01:14, 307.61it/s]\u001b[A\n",
            " 15% 3996/26883 [00:12<01:14, 306.32it/s]\u001b[A\n",
            " 15% 4031/26883 [00:12<01:12, 315.21it/s]\u001b[A\n",
            " 15% 4073/26883 [00:12<01:06, 341.23it/s]\u001b[A\n",
            " 15% 4108/26883 [00:12<01:08, 334.70it/s]\u001b[A\n",
            " 15% 4142/26883 [00:12<01:07, 334.88it/s]\u001b[A\n",
            " 16% 4176/26883 [00:12<01:07, 336.04it/s]\u001b[A\n",
            " 16% 4210/26883 [00:13<01:07, 333.58it/s]\u001b[A\n",
            " 16% 4244/26883 [00:13<01:09, 326.07it/s]\u001b[A\n",
            " 16% 4282/26883 [00:13<01:06, 340.48it/s]\u001b[A\n",
            " 16% 4321/26883 [00:13<01:03, 353.97it/s]\u001b[A\n",
            " 16% 4357/26883 [00:13<01:06, 337.27it/s]\u001b[A\n",
            " 16% 4398/26883 [00:13<01:03, 353.38it/s]\u001b[A\n",
            " 16% 4434/26883 [00:13<01:06, 335.61it/s]\u001b[A\n",
            " 17% 4468/26883 [00:13<01:09, 321.06it/s]\u001b[A\n",
            " 17% 4501/26883 [00:13<01:14, 299.11it/s]\u001b[A\n",
            " 17% 4534/26883 [00:14<01:13, 304.38it/s]\u001b[A\n",
            " 17% 4565/26883 [00:14<01:14, 300.28it/s]\u001b[A\n",
            " 17% 4596/26883 [00:14<01:13, 301.34it/s]\u001b[A\n",
            " 17% 4632/26883 [00:14<01:10, 315.77it/s]\u001b[A\n",
            " 17% 4669/26883 [00:14<01:07, 330.44it/s]\u001b[A\n",
            " 17% 4703/26883 [00:14<01:07, 327.26it/s]\u001b[A\n",
            " 18% 4736/26883 [00:14<01:10, 315.88it/s]\u001b[A\n",
            " 18% 4768/26883 [00:14<01:12, 303.10it/s]\u001b[A\n",
            " 18% 4803/26883 [00:14<01:10, 314.43it/s]\u001b[A\n",
            " 18% 4835/26883 [00:15<01:10, 312.97it/s]\u001b[A\n",
            " 18% 4867/26883 [00:15<01:10, 310.50it/s]\u001b[A\n",
            " 18% 4899/26883 [00:15<01:12, 305.31it/s]\u001b[A\n",
            " 18% 4935/26883 [00:15<01:09, 317.82it/s]\u001b[A\n",
            " 19% 4976/26883 [00:15<01:04, 342.15it/s]\u001b[A\n",
            " 19% 5011/26883 [00:15<01:06, 327.41it/s]\u001b[A\n",
            " 19% 5045/26883 [00:15<01:06, 330.32it/s]\u001b[A\n",
            " 19% 5081/26883 [00:15<01:04, 336.37it/s]\u001b[A\n",
            " 19% 5115/26883 [00:15<01:06, 325.00it/s]\u001b[A\n",
            " 19% 5148/26883 [00:15<01:08, 316.16it/s]\u001b[A\n",
            " 19% 5180/26883 [00:16<01:08, 315.15it/s]\u001b[A\n",
            " 19% 5212/26883 [00:16<01:09, 311.37it/s]\u001b[A\n",
            " 20% 5248/26883 [00:16<01:06, 323.69it/s]\u001b[A\n",
            " 20% 5286/26883 [00:16<01:03, 339.08it/s]\u001b[A\n",
            " 20% 5321/26883 [00:16<01:03, 339.58it/s]\u001b[A\n",
            " 20% 5356/26883 [00:16<01:05, 330.35it/s]\u001b[A\n",
            " 20% 5390/26883 [00:16<01:06, 322.99it/s]\u001b[A\n",
            " 20% 5423/26883 [00:16<01:06, 323.51it/s]\u001b[A\n",
            " 20% 5456/26883 [00:16<01:08, 312.06it/s]\u001b[A\n",
            " 20% 5488/26883 [00:17<01:10, 301.38it/s]\u001b[A\n",
            " 21% 5521/26883 [00:17<01:09, 308.66it/s]\u001b[A\n",
            " 21% 5557/26883 [00:17<01:06, 322.34it/s]\u001b[A\n",
            " 21% 5594/26883 [00:17<01:03, 334.05it/s]\u001b[A\n",
            " 21% 5628/26883 [00:17<01:05, 325.70it/s]\u001b[A\n",
            " 21% 5661/26883 [00:17<01:05, 325.01it/s]\u001b[A\n",
            " 21% 5694/26883 [00:17<01:07, 311.95it/s]\u001b[A\n",
            " 21% 5726/26883 [00:17<01:08, 309.97it/s]\u001b[A\n",
            " 21% 5764/26883 [00:17<01:04, 328.15it/s]\u001b[A\n",
            " 22% 5797/26883 [00:18<01:06, 316.94it/s]\u001b[A\n",
            " 22% 5832/26883 [00:18<01:04, 324.73it/s]\u001b[A\n",
            " 22% 5872/26883 [00:18<01:01, 344.43it/s]\u001b[A\n",
            " 22% 5907/26883 [00:18<01:01, 343.68it/s]\u001b[A\n",
            " 22% 5942/26883 [00:18<01:02, 334.66it/s]\u001b[A\n",
            " 22% 5976/26883 [00:18<01:05, 317.74it/s]\u001b[A\n",
            " 22% 6010/26883 [00:18<01:04, 323.80it/s]\u001b[A\n",
            " 22% 6043/26883 [00:18<01:07, 308.01it/s]\u001b[A\n",
            " 23% 6082/26883 [00:18<01:03, 329.60it/s]\u001b[A\n",
            " 23% 6116/26883 [00:18<01:05, 315.89it/s]\u001b[A\n",
            " 23% 6151/26883 [00:19<01:03, 324.55it/s]\u001b[A\n",
            " 23% 6184/26883 [00:19<01:04, 321.70it/s]\u001b[A\n",
            " 23% 6217/26883 [00:19<01:05, 316.90it/s]\u001b[A\n",
            " 23% 6252/26883 [00:19<01:03, 323.66it/s]\u001b[A\n",
            " 23% 6287/26883 [00:19<01:02, 330.27it/s]\u001b[A\n",
            " 24% 6321/26883 [00:19<01:02, 331.09it/s]\u001b[A\n",
            " 24% 6355/26883 [00:19<01:02, 328.63it/s]\u001b[A\n",
            " 24% 6388/26883 [00:19<01:02, 326.79it/s]\u001b[A\n",
            " 24% 6422/26883 [00:19<01:02, 327.85it/s]\u001b[A\n",
            " 24% 6455/26883 [00:20<01:03, 323.26it/s]\u001b[A\n",
            " 24% 6488/26883 [00:20<01:07, 304.24it/s]\u001b[A\n",
            " 24% 6521/26883 [00:20<01:05, 309.51it/s]\u001b[A\n",
            " 24% 6553/26883 [00:20<01:05, 308.42it/s]\u001b[A\n",
            " 25% 6587/26883 [00:20<01:04, 315.72it/s]\u001b[A\n",
            " 25% 6619/26883 [00:20<01:07, 299.16it/s]\u001b[A\n",
            " 25% 6652/26883 [00:20<01:05, 307.48it/s]\u001b[A\n",
            " 25% 6685/26883 [00:20<01:04, 311.50it/s]\u001b[A\n",
            " 25% 6717/26883 [00:20<01:06, 304.57it/s]\u001b[A\n",
            " 25% 6748/26883 [00:20<01:07, 298.22it/s]\u001b[A\n",
            " 25% 6779/26883 [00:21<01:07, 297.52it/s]\u001b[A\n",
            " 25% 6809/26883 [00:21<01:08, 291.53it/s]\u001b[A\n",
            " 25% 6845/26883 [00:21<01:04, 310.19it/s]\u001b[A\n",
            " 26% 6883/26883 [00:21<01:00, 329.46it/s]\u001b[A\n",
            " 26% 6917/26883 [00:21<01:01, 325.58it/s]\u001b[A\n",
            " 26% 6952/26883 [00:21<01:00, 329.47it/s]\u001b[A\n",
            " 26% 6989/26883 [00:21<00:58, 340.01it/s]\u001b[A\n",
            " 26% 7024/26883 [00:21<01:03, 313.38it/s]\u001b[A\n",
            " 26% 7056/26883 [00:21<01:03, 313.91it/s]\u001b[A\n",
            " 26% 7092/26883 [00:22<01:00, 325.04it/s]\u001b[A\n",
            " 27% 7125/26883 [00:22<01:03, 312.55it/s]\u001b[A\n",
            " 27% 7158/26883 [00:22<01:02, 317.35it/s]\u001b[A\n",
            " 27% 7193/26883 [00:22<01:00, 324.99it/s]\u001b[A\n",
            " 27% 7226/26883 [00:22<01:01, 318.46it/s]\u001b[A\n",
            " 27% 7258/26883 [00:22<01:02, 313.22it/s]\u001b[A\n",
            " 27% 7295/26883 [00:22<00:59, 329.11it/s]\u001b[A\n",
            " 27% 7329/26883 [00:22<00:59, 326.00it/s]\u001b[A\n",
            " 27% 7362/26883 [00:22<01:01, 319.96it/s]\u001b[A\n",
            " 28% 7395/26883 [00:23<01:02, 310.49it/s]\u001b[A\n",
            " 28% 7428/26883 [00:23<01:01, 315.95it/s]\u001b[A\n",
            " 28% 7462/26883 [00:23<01:00, 322.61it/s]\u001b[A\n",
            " 28% 7495/26883 [00:23<01:01, 317.39it/s]\u001b[A\n",
            " 28% 7528/26883 [00:23<01:00, 319.02it/s]\u001b[A\n",
            " 28% 7560/26883 [00:23<01:02, 309.52it/s]\u001b[A\n",
            " 28% 7597/26883 [00:23<00:59, 324.11it/s]\u001b[A\n",
            " 28% 7633/26883 [00:23<00:58, 331.50it/s]\u001b[A\n",
            " 29% 7667/26883 [00:23<00:57, 331.86it/s]\u001b[A\n",
            " 29% 7706/26883 [00:23<00:55, 347.24it/s]\u001b[A\n",
            " 29% 7744/26883 [00:24<00:53, 355.50it/s]\u001b[A\n",
            " 29% 7780/26883 [00:24<00:56, 339.69it/s]\u001b[A\n",
            " 29% 7815/26883 [00:24<00:58, 328.49it/s]\u001b[A\n",
            " 29% 7849/26883 [00:24<00:58, 325.07it/s]\u001b[A\n",
            " 29% 7884/26883 [00:24<00:57, 331.34it/s]\u001b[A\n",
            " 29% 7921/26883 [00:24<00:55, 342.38it/s]\u001b[A\n",
            " 30% 7956/26883 [00:24<00:57, 329.29it/s]\u001b[A\n",
            " 30% 7990/26883 [00:24<00:57, 329.35it/s]\u001b[A\n",
            " 30% 8024/26883 [00:24<00:58, 323.68it/s]\u001b[A\n",
            " 30% 8060/26883 [00:25<00:56, 331.85it/s]\u001b[A\n",
            " 30% 8094/26883 [00:25<00:57, 326.25it/s]\u001b[A\n",
            " 30% 8128/26883 [00:25<00:56, 329.43it/s]\u001b[A\n",
            " 30% 8162/26883 [00:25<00:58, 320.76it/s]\u001b[A\n",
            " 30% 8197/26883 [00:25<00:57, 327.77it/s]\u001b[A\n",
            " 31% 8230/26883 [00:25<00:57, 323.53it/s]\u001b[A\n",
            " 31% 8265/26883 [00:25<00:56, 329.40it/s]\u001b[A\n",
            " 31% 8299/26883 [00:25<00:56, 331.57it/s]\u001b[A\n",
            " 31% 8333/26883 [00:25<00:59, 314.06it/s]\u001b[A\n",
            " 31% 8365/26883 [00:25<01:01, 302.21it/s]\u001b[A\n",
            " 31% 8400/26883 [00:26<00:58, 313.70it/s]\u001b[A\n",
            " 31% 8439/26883 [00:26<00:55, 334.94it/s]\u001b[A\n",
            " 32% 8473/26883 [00:26<00:56, 324.10it/s]\u001b[A\n",
            " 32% 8509/26883 [00:26<00:55, 333.76it/s]\u001b[A\n",
            " 32% 8543/26883 [00:26<00:57, 318.33it/s]\u001b[A\n",
            " 32% 8578/26883 [00:26<00:56, 325.60it/s]\u001b[A\n",
            " 32% 8613/26883 [00:26<00:54, 332.23it/s]\u001b[A\n",
            " 32% 8647/26883 [00:26<00:54, 332.70it/s]\u001b[A\n",
            " 32% 8681/26883 [00:26<00:57, 317.87it/s]\u001b[A\n",
            " 32% 8720/26883 [00:27<00:54, 335.86it/s]\u001b[A\n",
            " 33% 8754/26883 [00:27<00:57, 316.21it/s]\u001b[A\n",
            " 33% 8787/26883 [00:27<00:57, 315.55it/s]\u001b[A\n",
            " 33% 8821/26883 [00:27<00:56, 320.86it/s]\u001b[A\n",
            " 33% 8854/26883 [00:27<00:58, 307.50it/s]\u001b[A\n",
            " 33% 8893/26883 [00:27<00:54, 329.38it/s]\u001b[A\n",
            " 33% 8927/26883 [00:27<00:56, 318.28it/s]\u001b[A\n",
            " 33% 8960/26883 [00:27<00:56, 315.88it/s]\u001b[A\n",
            " 33% 8995/26883 [00:27<00:55, 324.09it/s]\u001b[A\n",
            " 34% 9034/26883 [00:28<00:52, 337.78it/s]\u001b[A\n",
            " 34% 9068/26883 [00:28<00:53, 331.85it/s]\u001b[A\n",
            " 34% 9102/26883 [00:28<00:54, 328.47it/s]\u001b[A\n",
            " 34% 9144/26883 [00:28<00:50, 353.95it/s]\u001b[A\n",
            " 34% 9180/26883 [00:28<00:50, 349.66it/s]\u001b[A\n",
            " 34% 9216/26883 [00:28<00:54, 326.86it/s]\u001b[A\n",
            " 34% 9251/26883 [00:28<00:52, 332.87it/s]\u001b[A\n",
            " 35% 9285/26883 [00:28<00:58, 303.33it/s]\u001b[A\n",
            " 35% 9316/26883 [00:28<00:58, 300.15it/s]\u001b[A\n",
            " 35% 9347/26883 [00:29<00:58, 298.17it/s]\u001b[A\n",
            " 35% 9379/26883 [00:29<00:57, 303.69it/s]\u001b[A\n",
            " 35% 9410/26883 [00:29<00:57, 304.09it/s]\u001b[A\n",
            " 35% 9444/26883 [00:29<00:55, 313.66it/s]\u001b[A\n",
            " 35% 9476/26883 [00:29<00:55, 313.24it/s]\u001b[A\n",
            " 35% 9511/26883 [00:29<00:54, 320.79it/s]\u001b[A\n",
            " 36% 9552/26883 [00:29<00:50, 344.99it/s]\u001b[A\n",
            " 36% 9587/26883 [00:29<00:50, 343.96it/s]\u001b[A\n",
            " 36% 9622/26883 [00:29<00:50, 343.39it/s]\u001b[A\n",
            " 36% 9657/26883 [00:29<00:51, 333.19it/s]\u001b[A\n",
            " 36% 9691/26883 [00:30<00:51, 334.95it/s]\u001b[A\n",
            " 36% 9725/26883 [00:30<00:51, 330.25it/s]\u001b[A\n",
            " 36% 9759/26883 [00:30<00:54, 314.57it/s]\u001b[A\n",
            " 36% 9791/26883 [00:30<00:54, 311.99it/s]\u001b[A\n",
            " 37% 9823/26883 [00:30<00:54, 313.09it/s]\u001b[A\n",
            " 37% 9858/26883 [00:30<00:52, 323.46it/s]\u001b[A\n",
            " 37% 9891/26883 [00:30<00:53, 316.75it/s]\u001b[A\n",
            " 37% 9927/26883 [00:30<00:52, 325.87it/s]\u001b[A\n",
            " 37% 9965/26883 [00:30<00:50, 338.00it/s]\u001b[A\n",
            " 37% 10000/26883 [00:31<00:49, 339.22it/s]\u001b[A\n",
            " 37% 10036/26883 [00:31<00:49, 342.84it/s]\u001b[A\n",
            " 37% 10071/26883 [00:31<00:51, 329.01it/s]\u001b[A\n",
            " 38% 10108/26883 [00:31<00:49, 335.54it/s]\u001b[A\n",
            " 38% 10142/26883 [00:31<00:49, 336.03it/s]\u001b[A\n",
            " 38% 10179/26883 [00:31<00:48, 342.78it/s]\u001b[A\n",
            " 38% 10214/26883 [00:31<00:48, 344.74it/s]\u001b[A\n",
            " 38% 10249/26883 [00:31<00:50, 332.13it/s]\u001b[A\n",
            " 38% 10283/26883 [00:31<00:51, 324.73it/s]\u001b[A\n",
            " 38% 10317/26883 [00:31<00:50, 327.38it/s]\u001b[A\n",
            " 39% 10350/26883 [00:32<00:51, 323.86it/s]\u001b[A\n",
            " 39% 10387/26883 [00:32<00:48, 336.72it/s]\u001b[A\n",
            " 39% 10421/26883 [00:32<00:49, 333.93it/s]\u001b[A\n",
            " 39% 10455/26883 [00:32<00:49, 333.39it/s]\u001b[A\n",
            " 39% 10494/26883 [00:32<00:46, 349.75it/s]\u001b[A\n",
            " 39% 10535/26883 [00:32<00:44, 365.54it/s]\u001b[A\n",
            " 39% 10572/26883 [00:32<00:49, 330.96it/s]\u001b[A\n",
            " 39% 10606/26883 [00:32<00:49, 327.88it/s]\u001b[A\n",
            " 40% 10640/26883 [00:32<00:49, 328.60it/s]\u001b[A\n",
            " 40% 10674/26883 [00:33<00:50, 320.75it/s]\u001b[A\n",
            " 40% 10707/26883 [00:33<00:51, 314.26it/s]\u001b[A\n",
            " 40% 10739/26883 [00:33<00:52, 304.61it/s]\u001b[A\n",
            " 40% 10774/26883 [00:33<00:50, 316.86it/s]\u001b[A\n",
            " 40% 10806/26883 [00:33<00:51, 311.64it/s]\u001b[A\n",
            " 40% 10840/26883 [00:33<00:50, 315.45it/s]\u001b[A\n",
            " 40% 10873/26883 [00:33<00:50, 316.88it/s]\u001b[A\n",
            " 41% 10908/26883 [00:33<00:49, 323.33it/s]\u001b[A\n",
            " 41% 10944/26883 [00:33<00:47, 332.63it/s]\u001b[A\n",
            " 41% 10979/26883 [00:33<00:47, 335.27it/s]\u001b[A\n",
            " 41% 11016/26883 [00:34<00:46, 340.66it/s]\u001b[A\n",
            " 41% 11052/26883 [00:34<00:46, 342.60it/s]\u001b[A\n",
            " 41% 11089/26883 [00:34<00:45, 347.38it/s]\u001b[A\n",
            " 41% 11124/26883 [00:34<00:46, 338.79it/s]\u001b[A\n",
            " 42% 11158/26883 [00:34<00:47, 331.51it/s]\u001b[A\n",
            " 42% 11193/26883 [00:34<00:46, 336.81it/s]\u001b[A\n",
            " 42% 11227/26883 [00:34<00:46, 334.23it/s]\u001b[A\n",
            " 42% 11263/26883 [00:34<00:46, 335.33it/s]\u001b[A\n",
            " 42% 11297/26883 [00:34<00:47, 325.74it/s]\u001b[A\n",
            " 42% 11330/26883 [00:35<00:48, 320.89it/s]\u001b[A\n",
            " 42% 11363/26883 [00:35<00:48, 321.33it/s]\u001b[A\n",
            " 42% 11400/26883 [00:35<00:46, 333.61it/s]\u001b[A\n",
            " 43% 11434/26883 [00:35<00:47, 322.11it/s]\u001b[A\n",
            " 43% 11467/26883 [00:35<00:50, 307.87it/s]\u001b[A\n",
            " 43% 11500/26883 [00:35<00:49, 312.54it/s]\u001b[A\n",
            " 43% 11532/26883 [00:35<00:49, 312.66it/s]\u001b[A\n",
            " 43% 11564/26883 [00:35<00:50, 300.46it/s]\u001b[A\n",
            " 43% 11597/26883 [00:35<00:49, 307.62it/s]\u001b[A\n",
            " 43% 11630/26883 [00:35<00:48, 312.48it/s]\u001b[A\n",
            " 43% 11662/26883 [00:36<00:48, 311.70it/s]\u001b[A\n",
            " 43% 11694/26883 [00:36<00:50, 302.33it/s]\u001b[A\n",
            " 44% 11728/26883 [00:36<00:49, 307.35it/s]\u001b[A\n",
            " 44% 11759/26883 [00:36<00:50, 301.68it/s]\u001b[A\n",
            " 44% 11796/26883 [00:36<00:46, 321.17it/s]\u001b[A\n",
            " 44% 11829/26883 [00:36<00:47, 316.63it/s]\u001b[A\n",
            " 44% 11861/26883 [00:36<00:48, 312.58it/s]\u001b[A\n",
            " 44% 11895/26883 [00:36<00:46, 319.63it/s]\u001b[A\n",
            " 44% 11928/26883 [00:36<00:47, 315.44it/s]\u001b[A\n",
            " 45% 11963/26883 [00:37<00:45, 325.14it/s]\u001b[A\n",
            " 45% 12000/26883 [00:37<00:44, 336.07it/s]\u001b[A\n",
            " 45% 12034/26883 [00:37<00:44, 335.37it/s]\u001b[A\n",
            " 45% 12068/26883 [00:37<00:44, 332.19it/s]\u001b[A\n",
            " 45% 12105/26883 [00:37<00:43, 336.26it/s]\u001b[A\n",
            " 45% 12139/26883 [00:37<00:44, 330.99it/s]\u001b[A\n",
            " 45% 12176/26883 [00:37<00:43, 339.83it/s]\u001b[A\n",
            " 45% 12211/26883 [00:37<00:43, 340.53it/s]\u001b[A\n",
            " 46% 12248/26883 [00:37<00:42, 346.49it/s]\u001b[A\n",
            " 46% 12283/26883 [00:37<00:42, 345.43it/s]\u001b[A\n",
            " 46% 12318/26883 [00:38<00:45, 323.09it/s]\u001b[A\n",
            " 46% 12353/26883 [00:38<00:44, 330.01it/s]\u001b[A\n",
            " 46% 12390/26883 [00:38<00:42, 339.20it/s]\u001b[A\n",
            " 46% 12426/26883 [00:38<00:42, 343.70it/s]\u001b[A\n",
            " 46% 12461/26883 [00:38<00:43, 328.13it/s]\u001b[A\n",
            " 47% 12501/26883 [00:38<00:41, 346.21it/s]\u001b[A\n",
            " 47% 12536/26883 [00:38<00:43, 329.22it/s]\u001b[A\n",
            " 47% 12570/26883 [00:38<00:43, 330.47it/s]\u001b[A\n",
            " 47% 12604/26883 [00:38<00:44, 324.42it/s]\u001b[A\n",
            " 47% 12637/26883 [00:39<00:46, 304.36it/s]\u001b[A\n",
            " 47% 12668/26883 [00:39<00:47, 300.59it/s]\u001b[A\n",
            " 47% 12703/26883 [00:39<00:45, 313.86it/s]\u001b[A\n",
            " 47% 12735/26883 [00:39<00:46, 304.68it/s]\u001b[A\n",
            " 47% 12766/26883 [00:39<00:46, 302.40it/s]\u001b[A\n",
            " 48% 12801/26883 [00:39<00:44, 315.38it/s]\u001b[A\n",
            " 48% 12836/26883 [00:39<00:43, 324.08it/s]\u001b[A\n",
            " 48% 12869/26883 [00:39<00:44, 316.91it/s]\u001b[A\n",
            " 48% 12902/26883 [00:39<00:43, 317.78it/s]\u001b[A\n",
            " 48% 12934/26883 [00:40<00:43, 318.38it/s]\u001b[A\n",
            " 48% 12968/26883 [00:40<00:43, 323.57it/s]\u001b[A\n",
            " 48% 13001/26883 [00:40<00:42, 323.72it/s]\u001b[A\n",
            " 48% 13034/26883 [00:40<00:43, 319.22it/s]\u001b[A\n",
            " 49% 13068/26883 [00:40<00:42, 322.58it/s]\u001b[A\n",
            " 49% 13104/26883 [00:40<00:41, 333.33it/s]\u001b[A\n",
            " 49% 13138/26883 [00:40<00:42, 324.57it/s]\u001b[A\n",
            " 49% 13171/26883 [00:40<00:42, 319.10it/s]\u001b[A\n",
            " 49% 13206/26883 [00:40<00:41, 327.93it/s]\u001b[A\n",
            " 49% 13243/26883 [00:40<00:40, 337.39it/s]\u001b[A\n",
            " 49% 13277/26883 [00:41<00:42, 323.66it/s]\u001b[A\n",
            " 50% 13310/26883 [00:41<00:42, 319.77it/s]\u001b[A\n",
            " 50% 13345/26883 [00:41<00:41, 326.87it/s]\u001b[A\n",
            " 50% 13378/26883 [00:41<00:41, 325.45it/s]\u001b[A\n",
            " 50% 13411/26883 [00:41<00:41, 323.91it/s]\u001b[A\n",
            " 50% 13444/26883 [00:41<00:42, 312.96it/s]\u001b[A\n",
            " 50% 13476/26883 [00:41<00:42, 312.68it/s]\u001b[A\n",
            " 50% 13510/26883 [00:41<00:42, 317.67it/s]\u001b[A\n",
            " 50% 13547/26883 [00:41<00:40, 331.59it/s]\u001b[A\n",
            " 51% 13583/26883 [00:42<00:39, 338.88it/s]\u001b[A\n",
            " 51% 13617/26883 [00:42<00:39, 332.43it/s]\u001b[A\n",
            " 51% 13655/26883 [00:42<00:38, 341.70it/s]\u001b[A\n",
            " 51% 13690/26883 [00:42<00:41, 321.45it/s]\u001b[A\n",
            " 51% 13724/26883 [00:42<00:40, 325.35it/s]\u001b[A\n",
            " 51% 13759/26883 [00:42<00:40, 327.60it/s]\u001b[A\n",
            " 51% 13792/26883 [00:42<00:40, 320.10it/s]\u001b[A\n",
            " 51% 13825/26883 [00:42<00:40, 319.17it/s]\u001b[A\n",
            " 52% 13860/26883 [00:42<00:39, 326.53it/s]\u001b[A\n",
            " 52% 13899/26883 [00:42<00:37, 343.99it/s]\u001b[A\n",
            " 52% 13934/26883 [00:43<00:38, 340.50it/s]\u001b[A\n",
            " 52% 13969/26883 [00:43<00:40, 322.59it/s]\u001b[A\n",
            " 52% 14004/26883 [00:43<00:39, 328.75it/s]\u001b[A\n",
            " 52% 14038/26883 [00:43<00:39, 329.30it/s]\u001b[A\n",
            " 52% 14072/26883 [00:43<00:39, 323.82it/s]\u001b[A\n",
            " 52% 14109/26883 [00:43<00:38, 334.96it/s]\u001b[A\n",
            " 53% 14143/26883 [00:43<00:38, 331.65it/s]\u001b[A\n",
            " 53% 14177/26883 [00:43<00:38, 332.13it/s]\u001b[A\n",
            " 53% 14211/26883 [00:43<00:38, 332.93it/s]\u001b[A\n",
            " 53% 14246/26883 [00:44<00:37, 337.65it/s]\u001b[A\n",
            " 53% 14280/26883 [00:44<00:37, 333.26it/s]\u001b[A\n",
            " 53% 14315/26883 [00:44<00:37, 338.00it/s]\u001b[A\n",
            " 53% 14351/26883 [00:44<00:36, 341.95it/s]\u001b[A\n",
            " 54% 14386/26883 [00:44<00:39, 317.34it/s]\u001b[A\n",
            " 54% 14421/26883 [00:44<00:38, 324.77it/s]\u001b[A\n",
            " 54% 14454/26883 [00:44<00:38, 323.64it/s]\u001b[A\n",
            " 54% 14487/26883 [00:44<00:38, 323.00it/s]\u001b[A\n",
            " 54% 14520/26883 [00:44<00:39, 313.53it/s]\u001b[A\n",
            " 54% 14557/26883 [00:44<00:37, 325.18it/s]\u001b[A\n",
            " 54% 14590/26883 [00:45<00:37, 324.28it/s]\u001b[A\n",
            " 54% 14625/26883 [00:45<00:37, 330.10it/s]\u001b[A\n",
            " 55% 14660/26883 [00:45<00:36, 335.39it/s]\u001b[A\n",
            " 55% 14694/26883 [00:45<00:37, 324.57it/s]\u001b[A\n",
            " 55% 14729/26883 [00:45<00:36, 330.90it/s]\u001b[A\n",
            " 55% 14765/26883 [00:45<00:35, 337.48it/s]\u001b[A\n",
            " 55% 14799/26883 [00:45<00:36, 331.82it/s]\u001b[A\n",
            " 55% 14837/26883 [00:45<00:34, 345.78it/s]\u001b[A\n",
            " 55% 14872/26883 [00:45<00:35, 337.07it/s]\u001b[A\n",
            " 55% 14906/26883 [00:46<00:36, 332.25it/s]\u001b[A\n",
            " 56% 14940/26883 [00:46<00:38, 310.27it/s]\u001b[A\n",
            " 56% 14972/26883 [00:46<00:38, 309.50it/s]\u001b[A\n",
            " 56% 15005/26883 [00:46<00:37, 314.05it/s]\u001b[A\n",
            " 56% 15040/26883 [00:46<00:36, 321.33it/s]\u001b[A\n",
            " 56% 15073/26883 [00:46<00:38, 310.10it/s]\u001b[A\n",
            " 56% 15110/26883 [00:46<00:36, 324.04it/s]\u001b[A\n",
            " 56% 15143/26883 [00:46<00:36, 324.08it/s]\u001b[A\n",
            " 56% 15176/26883 [00:46<00:36, 318.05it/s]\u001b[A\n",
            " 57% 15211/26883 [00:46<00:35, 326.87it/s]\u001b[A\n",
            " 57% 15244/26883 [00:47<00:35, 325.47it/s]\u001b[A\n",
            " 57% 15277/26883 [00:47<00:36, 316.12it/s]\u001b[A\n",
            " 57% 15312/26883 [00:47<00:35, 323.09it/s]\u001b[A\n",
            " 57% 15346/26883 [00:47<00:35, 326.37it/s]\u001b[A\n",
            " 57% 15381/26883 [00:47<00:34, 332.69it/s]\u001b[A\n",
            " 57% 15415/26883 [00:47<00:34, 329.80it/s]\u001b[A\n",
            " 57% 15449/26883 [00:47<00:34, 329.61it/s]\u001b[A\n",
            " 58% 15484/26883 [00:47<00:34, 334.79it/s]\u001b[A\n",
            " 58% 15518/26883 [00:47<00:34, 325.27it/s]\u001b[A\n",
            " 58% 15551/26883 [00:48<00:35, 319.22it/s]\u001b[A\n",
            " 58% 15587/26883 [00:48<00:34, 328.79it/s]\u001b[A\n",
            " 58% 15620/26883 [00:48<00:34, 322.63it/s]\u001b[A\n",
            " 58% 15653/26883 [00:48<00:34, 320.89it/s]\u001b[A\n",
            " 58% 15687/26883 [00:48<00:34, 326.09it/s]\u001b[A\n",
            " 58% 15720/26883 [00:48<00:35, 315.80it/s]\u001b[A\n",
            " 59% 15753/26883 [00:48<00:35, 317.12it/s]\u001b[A\n",
            " 59% 15785/26883 [00:48<00:36, 300.20it/s]\u001b[A\n",
            " 59% 15820/26883 [00:48<00:35, 313.14it/s]\u001b[A\n",
            " 59% 15853/26883 [00:48<00:34, 316.45it/s]\u001b[A\n",
            " 59% 15886/26883 [00:49<00:34, 320.23it/s]\u001b[A\n",
            " 59% 15919/26883 [00:49<00:34, 321.16it/s]\u001b[A\n",
            " 59% 15952/26883 [00:49<00:34, 316.06it/s]\u001b[A\n",
            " 59% 15986/26883 [00:49<00:34, 320.10it/s]\u001b[A\n",
            " 60% 16022/26883 [00:49<00:32, 330.86it/s]\u001b[A\n",
            " 60% 16056/26883 [00:49<00:33, 322.20it/s]\u001b[A\n",
            " 60% 16093/26883 [00:49<00:32, 331.77it/s]\u001b[A\n",
            " 60% 16127/26883 [00:49<00:33, 324.86it/s]\u001b[A\n",
            " 60% 16160/26883 [00:49<00:33, 324.85it/s]\u001b[A\n",
            " 60% 16193/26883 [00:50<00:32, 324.04it/s]\u001b[A\n",
            " 60% 16228/26883 [00:50<00:32, 329.75it/s]\u001b[A\n",
            " 60% 16262/26883 [00:50<00:31, 332.30it/s]\u001b[A\n",
            " 61% 16300/26883 [00:50<00:30, 345.80it/s]\u001b[A\n",
            " 61% 16335/26883 [00:50<00:31, 332.28it/s]\u001b[A\n",
            " 61% 16369/26883 [00:50<00:32, 324.25it/s]\u001b[A\n",
            " 61% 16402/26883 [00:50<00:32, 324.44it/s]\u001b[A\n",
            " 61% 16435/26883 [00:50<00:32, 322.27it/s]\u001b[A\n",
            " 61% 16470/26883 [00:50<00:31, 327.07it/s]\u001b[A\n",
            " 61% 16503/26883 [00:50<00:32, 319.24it/s]\u001b[A\n",
            " 62% 16538/26883 [00:51<00:31, 327.77it/s]\u001b[A\n",
            " 62% 16578/26883 [00:51<00:29, 344.06it/s]\u001b[A\n",
            " 62% 16613/26883 [00:51<00:30, 340.61it/s]\u001b[A\n",
            " 62% 16648/26883 [00:51<00:30, 335.18it/s]\u001b[A\n",
            " 62% 16687/26883 [00:51<00:29, 347.81it/s]\u001b[A\n",
            " 62% 16724/26883 [00:51<00:28, 352.99it/s]\u001b[A\n",
            " 62% 16760/26883 [00:51<00:30, 328.41it/s]\u001b[A\n",
            " 62% 16797/26883 [00:51<00:29, 336.67it/s]\u001b[A\n",
            " 63% 16831/26883 [00:51<00:30, 327.09it/s]\u001b[A\n",
            " 63% 16864/26883 [00:52<00:31, 320.78it/s]\u001b[A\n",
            " 63% 16900/26883 [00:52<00:30, 331.24it/s]\u001b[A\n",
            " 63% 16939/26883 [00:52<00:28, 345.30it/s]\u001b[A\n",
            " 63% 16974/26883 [00:52<00:29, 333.61it/s]\u001b[A\n",
            " 63% 17008/26883 [00:52<00:30, 319.64it/s]\u001b[A\n",
            " 63% 17041/26883 [00:52<00:31, 313.80it/s]\u001b[A\n",
            " 64% 17076/26883 [00:52<00:30, 319.42it/s]\u001b[A\n",
            " 64% 17109/26883 [00:52<00:30, 316.28it/s]\u001b[A\n",
            " 64% 17144/26883 [00:52<00:29, 325.43it/s]\u001b[A\n",
            " 64% 17181/26883 [00:53<00:28, 336.88it/s]\u001b[A\n",
            " 64% 17215/26883 [00:53<00:28, 335.58it/s]\u001b[A\n",
            " 64% 17249/26883 [00:53<00:28, 333.72it/s]\u001b[A\n",
            " 64% 17283/26883 [00:53<00:29, 328.16it/s]\u001b[A\n",
            " 64% 17316/26883 [00:53<00:31, 308.41it/s]\u001b[A\n",
            " 65% 17349/26883 [00:53<00:30, 313.27it/s]\u001b[A\n",
            " 65% 17381/26883 [00:53<00:30, 312.33it/s]\u001b[A\n",
            " 65% 17414/26883 [00:53<00:30, 314.62it/s]\u001b[A\n",
            " 65% 17446/26883 [00:53<00:30, 314.12it/s]\u001b[A\n",
            " 65% 17482/26883 [00:53<00:28, 326.95it/s]\u001b[A\n",
            " 65% 17515/26883 [00:54<00:29, 320.30it/s]\u001b[A\n",
            " 65% 17550/26883 [00:54<00:28, 328.30it/s]\u001b[A\n",
            " 65% 17590/26883 [00:54<00:26, 347.65it/s]\u001b[A\n",
            " 66% 17625/26883 [00:54<00:27, 337.59it/s]\u001b[A\n",
            " 66% 17659/26883 [00:54<00:28, 321.81it/s]\u001b[A\n",
            " 66% 17696/26883 [00:54<00:27, 331.96it/s]\u001b[A\n",
            " 66% 17734/26883 [00:54<00:26, 342.11it/s]\u001b[A\n",
            " 66% 17769/26883 [00:54<00:26, 342.61it/s]\u001b[A\n",
            " 66% 17804/26883 [00:54<00:26, 338.57it/s]\u001b[A\n",
            " 66% 17839/26883 [00:55<00:26, 341.00it/s]\u001b[A\n",
            " 66% 17874/26883 [00:55<00:26, 338.83it/s]\u001b[A\n",
            " 67% 17908/26883 [00:55<00:26, 336.05it/s]\u001b[A\n",
            " 67% 17942/26883 [00:55<00:27, 330.88it/s]\u001b[A\n",
            " 67% 17976/26883 [00:55<00:26, 330.59it/s]\u001b[A\n",
            " 67% 18010/26883 [00:55<00:26, 330.89it/s]\u001b[A\n",
            " 67% 18047/26883 [00:55<00:25, 341.53it/s]\u001b[A\n",
            " 67% 18082/26883 [00:55<00:26, 335.14it/s]\u001b[A\n",
            " 67% 18118/26883 [00:55<00:25, 340.24it/s]\u001b[A\n",
            " 68% 18156/26883 [00:55<00:24, 351.10it/s]\u001b[A\n",
            " 68% 18192/26883 [00:56<00:24, 349.71it/s]\u001b[A\n",
            " 68% 18228/26883 [00:56<00:25, 340.17it/s]\u001b[A\n",
            " 68% 18263/26883 [00:56<00:26, 326.38it/s]\u001b[A\n",
            " 68% 18296/26883 [00:56<00:26, 319.79it/s]\u001b[A\n",
            " 68% 18329/26883 [00:56<00:27, 313.99it/s]\u001b[A\n",
            " 68% 18361/26883 [00:56<00:27, 308.21it/s]\u001b[A\n",
            " 68% 18392/26883 [00:56<00:28, 298.69it/s]\u001b[A\n",
            " 69% 18425/26883 [00:56<00:27, 307.18it/s]\u001b[A\n",
            " 69% 18456/26883 [00:56<00:27, 304.09it/s]\u001b[A\n",
            " 69% 18488/26883 [00:57<00:27, 306.64it/s]\u001b[A\n",
            " 69% 18523/26883 [00:57<00:26, 316.33it/s]\u001b[A\n",
            " 69% 18562/26883 [00:57<00:24, 337.51it/s]\u001b[A\n",
            " 69% 18598/26883 [00:57<00:24, 341.90it/s]\u001b[A\n",
            " 69% 18633/26883 [00:57<00:24, 331.86it/s]\u001b[A\n",
            " 69% 18670/26883 [00:57<00:23, 342.49it/s]\u001b[A\n",
            " 70% 18705/26883 [00:57<00:24, 327.20it/s]\u001b[A\n",
            " 70% 18745/26883 [00:57<00:23, 344.95it/s]\u001b[A\n",
            " 70% 18780/26883 [00:57<00:24, 335.93it/s]\u001b[A\n",
            " 70% 18815/26883 [00:57<00:23, 339.39it/s]\u001b[A\n",
            " 70% 18853/26883 [00:58<00:22, 349.88it/s]\u001b[A\n",
            " 70% 18889/26883 [00:58<00:22, 349.04it/s]\u001b[A\n",
            " 70% 18925/26883 [00:58<00:22, 347.17it/s]\u001b[A\n",
            " 71% 18963/26883 [00:58<00:22, 356.65it/s]\u001b[A\n",
            " 71% 18999/26883 [00:58<00:23, 330.86it/s]\u001b[A\n",
            " 71% 19037/26883 [00:58<00:22, 344.31it/s]\u001b[A\n",
            " 71% 19072/26883 [00:58<00:22, 341.07it/s]\u001b[A\n",
            " 71% 19107/26883 [00:58<00:24, 318.75it/s]\u001b[A\n",
            " 71% 19140/26883 [00:58<00:24, 310.42it/s]\u001b[A\n",
            " 71% 19180/26883 [00:59<00:23, 334.30it/s]\u001b[A\n",
            " 71% 19215/26883 [00:59<00:22, 337.44it/s]\u001b[A\n",
            " 72% 19250/26883 [00:59<00:24, 315.08it/s]\u001b[A\n",
            " 72% 19288/26883 [00:59<00:22, 330.94it/s]\u001b[A\n",
            " 72% 19322/26883 [00:59<00:22, 329.45it/s]\u001b[A\n",
            " 72% 19356/26883 [00:59<00:23, 317.12it/s]\u001b[A\n",
            " 72% 19390/26883 [00:59<00:23, 322.47it/s]\u001b[A\n",
            " 72% 19424/26883 [00:59<00:22, 326.88it/s]\u001b[A\n",
            " 72% 19457/26883 [00:59<00:23, 322.12it/s]\u001b[A\n",
            " 73% 19491/26883 [01:00<00:22, 325.27it/s]\u001b[A\n",
            " 73% 19530/26883 [01:00<00:21, 341.56it/s]\u001b[A\n",
            " 73% 19565/26883 [01:00<00:21, 333.61it/s]\u001b[A\n",
            " 73% 19599/26883 [01:00<00:22, 318.88it/s]\u001b[A\n",
            " 73% 19636/26883 [01:00<00:21, 331.37it/s]\u001b[A\n",
            " 73% 19670/26883 [01:00<00:21, 330.82it/s]\u001b[A\n",
            " 73% 19708/26883 [01:00<00:20, 342.70it/s]\u001b[A\n",
            " 73% 19743/26883 [01:00<00:22, 321.17it/s]\u001b[A\n",
            " 74% 19777/26883 [01:00<00:21, 324.01it/s]\u001b[A\n",
            " 74% 19812/26883 [01:01<00:21, 326.89it/s]\u001b[A\n",
            " 74% 19845/26883 [01:01<00:22, 311.40it/s]\u001b[A\n",
            " 74% 19880/26883 [01:01<00:21, 321.07it/s]\u001b[A\n",
            " 74% 19913/26883 [01:01<00:21, 319.02it/s]\u001b[A\n",
            " 74% 19946/26883 [01:01<00:21, 318.35it/s]\u001b[A\n",
            " 74% 19981/26883 [01:01<00:21, 327.14it/s]\u001b[A\n",
            " 74% 20020/26883 [01:01<00:20, 342.76it/s]\u001b[A\n",
            " 75% 20055/26883 [01:01<00:20, 331.96it/s]\u001b[A\n",
            " 75% 20089/26883 [01:01<00:21, 319.18it/s]\u001b[A\n",
            " 75% 20123/26883 [01:01<00:21, 321.42it/s]\u001b[A\n",
            " 75% 20156/26883 [01:02<00:20, 322.78it/s]\u001b[A\n",
            " 75% 20189/26883 [01:02<00:20, 320.85it/s]\u001b[A\n",
            " 75% 20222/26883 [01:02<00:21, 312.90it/s]\u001b[A\n",
            " 75% 20260/26883 [01:02<00:20, 326.83it/s]\u001b[A\n",
            " 75% 20293/26883 [01:02<00:20, 322.08it/s]\u001b[A\n",
            " 76% 20326/26883 [01:02<00:20, 322.35it/s]\u001b[A\n",
            " 76% 20359/26883 [01:02<00:20, 313.02it/s]\u001b[A\n",
            " 76% 20394/26883 [01:02<00:20, 321.91it/s]\u001b[A\n",
            " 76% 20427/26883 [01:02<00:20, 317.58it/s]\u001b[A\n",
            " 76% 20465/26883 [01:03<00:19, 335.09it/s]\u001b[A\n",
            " 76% 20499/26883 [01:03<00:20, 310.66it/s]\u001b[A\n",
            " 76% 20538/26883 [01:03<00:19, 331.98it/s]\u001b[A\n",
            " 77% 20572/26883 [01:03<00:19, 328.98it/s]\u001b[A\n",
            " 77% 20606/26883 [01:03<00:19, 323.12it/s]\u001b[A\n",
            " 77% 20639/26883 [01:03<00:19, 323.84it/s]\u001b[A\n",
            " 77% 20674/26883 [01:03<00:18, 328.61it/s]\u001b[A\n",
            " 77% 20707/26883 [01:03<00:19, 314.23it/s]\u001b[A\n",
            " 77% 20739/26883 [01:03<00:19, 314.10it/s]\u001b[A\n",
            " 77% 20774/26883 [01:03<00:18, 321.75it/s]\u001b[A\n",
            " 77% 20807/26883 [01:04<00:19, 312.19it/s]\u001b[A\n",
            " 78% 20841/26883 [01:04<00:19, 313.85it/s]\u001b[A\n",
            " 78% 20875/26883 [01:04<00:18, 319.04it/s]\u001b[A\n",
            " 78% 20907/26883 [01:04<00:18, 316.18it/s]\u001b[A\n",
            " 78% 20944/26883 [01:04<00:18, 324.38it/s]\u001b[A\n",
            " 78% 20977/26883 [01:04<00:19, 301.20it/s]\u001b[A\n",
            " 78% 21014/26883 [01:04<00:18, 319.95it/s]\u001b[A\n",
            " 78% 21047/26883 [01:04<00:19, 304.71it/s]\u001b[A\n",
            " 78% 21079/26883 [01:04<00:18, 308.32it/s]\u001b[A\n",
            " 79% 21111/26883 [01:05<00:19, 302.16it/s]\u001b[A\n",
            " 79% 21142/26883 [01:05<00:19, 298.42it/s]\u001b[A\n",
            " 79% 21177/26883 [01:05<00:18, 311.25it/s]\u001b[A\n",
            " 79% 21209/26883 [01:05<00:18, 309.47it/s]\u001b[A\n",
            " 79% 21247/26883 [01:05<00:17, 329.35it/s]\u001b[A\n",
            " 79% 21281/26883 [01:05<00:17, 327.82it/s]\u001b[A\n",
            " 79% 21316/26883 [01:05<00:16, 330.38it/s]\u001b[A\n",
            " 79% 21351/26883 [01:05<00:16, 334.79it/s]\u001b[A\n",
            " 80% 21385/26883 [01:05<00:16, 331.94it/s]\u001b[A\n",
            " 80% 21419/26883 [01:06<00:17, 320.65it/s]\u001b[A\n",
            " 80% 21452/26883 [01:06<00:16, 319.67it/s]\u001b[A\n",
            " 80% 21490/26883 [01:06<00:16, 334.06it/s]\u001b[A\n",
            " 80% 21524/26883 [01:06<00:16, 323.10it/s]\u001b[A\n",
            " 80% 21558/26883 [01:06<00:16, 325.82it/s]\u001b[A\n",
            " 80% 21591/26883 [01:06<00:17, 303.82it/s]\u001b[A\n",
            " 80% 21622/26883 [01:06<00:17, 303.27it/s]\u001b[A\n",
            " 81% 21653/26883 [01:06<00:17, 301.29it/s]\u001b[A\n",
            " 81% 21686/26883 [01:06<00:16, 307.75it/s]\u001b[A\n",
            " 81% 21718/26883 [01:06<00:16, 308.95it/s]\u001b[A\n",
            " 81% 21751/26883 [01:07<00:16, 312.75it/s]\u001b[A\n",
            " 81% 21783/26883 [01:07<00:16, 300.15it/s]\u001b[A\n",
            " 81% 21819/26883 [01:07<00:16, 315.43it/s]\u001b[A\n",
            " 81% 21855/26883 [01:07<00:15, 326.23it/s]\u001b[A\n",
            " 81% 21888/26883 [01:07<00:15, 327.02it/s]\u001b[A\n",
            " 82% 21921/26883 [01:07<00:16, 307.74it/s]\u001b[A\n",
            " 82% 21954/26883 [01:07<00:15, 313.23it/s]\u001b[A\n",
            " 82% 21986/26883 [01:07<00:15, 309.81it/s]\u001b[A\n",
            " 82% 22018/26883 [01:07<00:15, 308.23it/s]\u001b[A\n",
            " 82% 22052/26883 [01:08<00:15, 317.35it/s]\u001b[A\n",
            " 82% 22084/26883 [01:08<00:15, 315.61it/s]\u001b[A\n",
            " 82% 22116/26883 [01:08<00:15, 311.40it/s]\u001b[A\n",
            " 82% 22148/26883 [01:08<00:15, 310.91it/s]\u001b[A\n",
            " 83% 22185/26883 [01:08<00:14, 327.91it/s]\u001b[A\n",
            " 83% 22219/26883 [01:08<00:14, 330.81it/s]\u001b[A\n",
            " 83% 22253/26883 [01:08<00:14, 319.88it/s]\u001b[A\n",
            " 83% 22286/26883 [01:08<00:14, 311.33it/s]\u001b[A\n",
            " 83% 22322/26883 [01:08<00:14, 323.28it/s]\u001b[A\n",
            " 83% 22355/26883 [01:09<00:14, 315.86it/s]\u001b[A\n",
            " 83% 22387/26883 [01:09<00:14, 307.68it/s]\u001b[A\n",
            " 83% 22418/26883 [01:09<00:14, 301.54it/s]\u001b[A\n",
            " 84% 22456/26883 [01:09<00:13, 323.22it/s]\u001b[A\n",
            " 84% 22491/26883 [01:09<00:13, 330.32it/s]\u001b[A\n",
            " 84% 22526/26883 [01:09<00:13, 329.99it/s]\u001b[A\n",
            " 84% 22560/26883 [01:09<00:13, 323.21it/s]\u001b[A\n",
            " 84% 22596/26883 [01:09<00:12, 332.78it/s]\u001b[A\n",
            " 84% 22630/26883 [01:09<00:13, 308.53it/s]\u001b[A\n",
            " 84% 22662/26883 [01:09<00:13, 309.71it/s]\u001b[A\n",
            " 84% 22696/26883 [01:10<00:13, 316.38it/s]\u001b[A\n",
            " 85% 22730/26883 [01:10<00:12, 321.82it/s]\u001b[A\n",
            " 85% 22763/26883 [01:10<00:12, 322.17it/s]\u001b[A\n",
            " 85% 22800/26883 [01:10<00:12, 334.42it/s]\u001b[A\n",
            " 85% 22835/26883 [01:10<00:11, 338.66it/s]\u001b[A\n",
            " 85% 22869/26883 [01:10<00:12, 330.83it/s]\u001b[A\n",
            " 85% 22903/26883 [01:10<00:12, 319.26it/s]\u001b[A\n",
            " 85% 22936/26883 [01:10<00:12, 317.48it/s]\u001b[A\n",
            " 85% 22968/26883 [01:10<00:12, 309.95it/s]\u001b[A\n",
            " 86% 23000/26883 [01:11<00:12, 312.65it/s]\u001b[A\n",
            " 86% 23032/26883 [01:11<00:12, 314.74it/s]\u001b[A\n",
            " 86% 23069/26883 [01:11<00:11, 330.64it/s]\u001b[A\n",
            " 86% 23104/26883 [01:11<00:11, 336.21it/s]\u001b[A\n",
            " 86% 23138/26883 [01:11<00:11, 320.64it/s]\u001b[A\n",
            " 86% 23176/26883 [01:11<00:11, 335.88it/s]\u001b[A\n",
            " 86% 23210/26883 [01:11<00:10, 336.17it/s]\u001b[A\n",
            " 86% 23247/26883 [01:11<00:10, 344.98it/s]\u001b[A\n",
            " 87% 23282/26883 [01:11<00:10, 339.21it/s]\u001b[A\n",
            " 87% 23318/26883 [01:11<00:10, 344.93it/s]\u001b[A\n",
            " 87% 23353/26883 [01:12<00:10, 334.36it/s]\u001b[A\n",
            " 87% 23390/26883 [01:12<00:10, 343.96it/s]\u001b[A\n",
            " 87% 23425/26883 [01:12<00:10, 326.97it/s]\u001b[A\n",
            " 87% 23461/26883 [01:12<00:10, 335.73it/s]\u001b[A\n",
            " 87% 23495/26883 [01:12<00:10, 327.39it/s]\u001b[A\n",
            " 88% 23531/26883 [01:12<00:10, 332.29it/s]\u001b[A\n",
            " 88% 23565/26883 [01:12<00:10, 305.86it/s]\u001b[A\n",
            " 88% 23597/26883 [01:12<00:11, 288.67it/s]\u001b[A\n",
            " 88% 23633/26883 [01:12<00:10, 307.03it/s]\u001b[A\n",
            " 88% 23670/26883 [01:13<00:10, 319.88it/s]\u001b[A\n",
            " 88% 23703/26883 [01:13<00:10, 313.19it/s]\u001b[A\n",
            " 88% 23739/26883 [01:13<00:09, 325.15it/s]\u001b[A\n",
            " 88% 23776/26883 [01:13<00:09, 336.14it/s]\u001b[A\n",
            " 89% 23810/26883 [01:13<00:09, 334.57it/s]\u001b[A\n",
            " 89% 23844/26883 [01:13<00:09, 310.01it/s]\u001b[A\n",
            " 89% 23876/26883 [01:13<00:09, 310.42it/s]\u001b[A\n",
            " 89% 23912/26883 [01:13<00:09, 323.07it/s]\u001b[A\n",
            " 89% 23945/26883 [01:13<00:09, 309.74it/s]\u001b[A\n",
            " 89% 23979/26883 [01:14<00:09, 318.14it/s]\u001b[A\n",
            " 89% 24012/26883 [01:14<00:08, 321.30it/s]\u001b[A\n",
            " 89% 24046/26883 [01:14<00:08, 325.15it/s]\u001b[A\n",
            " 90% 24079/26883 [01:14<00:08, 312.71it/s]\u001b[A\n",
            " 90% 24112/26883 [01:14<00:08, 317.03it/s]\u001b[A\n",
            " 90% 24144/26883 [01:14<00:08, 314.73it/s]\u001b[A\n",
            " 90% 24181/26883 [01:14<00:08, 325.59it/s]\u001b[A\n",
            " 90% 24217/26883 [01:14<00:07, 335.13it/s]\u001b[A\n",
            " 90% 24251/26883 [01:14<00:07, 333.21it/s]\u001b[A\n",
            " 90% 24286/26883 [01:14<00:07, 337.34it/s]\u001b[A\n",
            " 90% 24324/26883 [01:15<00:07, 347.59it/s]\u001b[A\n",
            " 91% 24359/26883 [01:15<00:07, 345.26it/s]\u001b[A\n",
            " 91% 24396/26883 [01:15<00:07, 349.99it/s]\u001b[A\n",
            " 91% 24432/26883 [01:15<00:06, 351.09it/s]\u001b[A\n",
            " 91% 24468/26883 [01:15<00:07, 338.46it/s]\u001b[A\n",
            " 91% 24502/26883 [01:15<00:07, 328.44it/s]\u001b[A\n",
            " 91% 24535/26883 [01:15<00:07, 325.48it/s]\u001b[A\n",
            " 91% 24568/26883 [01:15<00:07, 323.02it/s]\u001b[A\n",
            " 92% 24602/26883 [01:15<00:06, 327.61it/s]\u001b[A\n",
            " 92% 24635/26883 [01:16<00:07, 317.29it/s]\u001b[A\n",
            " 92% 24667/26883 [01:16<00:07, 315.54it/s]\u001b[A\n",
            " 92% 24699/26883 [01:16<00:07, 311.18it/s]\u001b[A\n",
            " 92% 24733/26883 [01:16<00:06, 317.34it/s]\u001b[A\n",
            " 92% 24770/26883 [01:16<00:06, 331.41it/s]\u001b[A\n",
            " 92% 24804/26883 [01:16<00:06, 318.41it/s]\u001b[A\n",
            " 92% 24836/26883 [01:16<00:06, 309.14it/s]\u001b[A\n",
            " 93% 24868/26883 [01:16<00:06, 303.33it/s]\u001b[A\n",
            " 93% 24899/26883 [01:16<00:06, 301.51it/s]\u001b[A\n",
            " 93% 24932/26883 [01:16<00:06, 307.42it/s]\u001b[A\n",
            " 93% 24963/26883 [01:17<00:06, 303.66it/s]\u001b[A\n",
            " 93% 25003/26883 [01:17<00:05, 328.41it/s]\u001b[A\n",
            " 93% 25036/26883 [01:17<00:05, 324.58it/s]\u001b[A\n",
            " 93% 25072/26883 [01:17<00:05, 333.74it/s]\u001b[A\n",
            " 93% 25106/26883 [01:17<00:05, 322.86it/s]\u001b[A\n",
            " 94% 25140/26883 [01:17<00:05, 327.27it/s]\u001b[A\n",
            " 94% 25175/26883 [01:17<00:05, 332.64it/s]\u001b[A\n",
            " 94% 25209/26883 [01:17<00:05, 324.03it/s]\u001b[A\n",
            " 94% 25242/26883 [01:17<00:05, 304.32it/s]\u001b[A\n",
            " 94% 25275/26883 [01:18<00:05, 310.43it/s]\u001b[A\n",
            " 94% 25307/26883 [01:18<00:05, 300.53it/s]\u001b[A\n",
            " 94% 25345/26883 [01:18<00:04, 322.31it/s]\u001b[A\n",
            " 94% 25382/26883 [01:18<00:04, 333.91it/s]\u001b[A\n",
            " 95% 25416/26883 [01:18<00:04, 331.24it/s]\u001b[A\n",
            " 95% 25450/26883 [01:18<00:04, 316.02it/s]\u001b[A\n",
            " 95% 25486/26883 [01:18<00:04, 323.61it/s]\u001b[A\n",
            " 95% 25519/26883 [01:18<00:04, 290.40it/s]\u001b[A\n",
            " 95% 25552/26883 [01:18<00:04, 297.85it/s]\u001b[A\n",
            " 95% 25590/26883 [01:19<00:04, 318.89it/s]\u001b[A\n",
            " 95% 25624/26883 [01:19<00:03, 324.08it/s]\u001b[A\n",
            " 95% 25657/26883 [01:19<00:03, 311.65it/s]\u001b[A\n",
            " 96% 25689/26883 [01:19<00:03, 312.20it/s]\u001b[A\n",
            " 96% 25726/26883 [01:19<00:03, 326.48it/s]\u001b[A\n",
            " 96% 25759/26883 [01:19<00:03, 314.10it/s]\u001b[A\n",
            " 96% 25794/26883 [01:19<00:03, 319.21it/s]\u001b[A\n",
            " 96% 25827/26883 [01:19<00:03, 314.77it/s]\u001b[A\n",
            " 96% 25862/26883 [01:19<00:03, 321.40it/s]\u001b[A\n",
            " 96% 25895/26883 [01:19<00:03, 316.94it/s]\u001b[A\n",
            " 96% 25927/26883 [01:20<00:03, 313.17it/s]\u001b[A\n",
            " 97% 25964/26883 [01:20<00:02, 328.98it/s]\u001b[A\n",
            " 97% 25999/26883 [01:20<00:02, 332.48it/s]\u001b[A\n",
            " 97% 26033/26883 [01:20<00:02, 328.45it/s]\u001b[A\n",
            " 97% 26066/26883 [01:20<00:02, 321.72it/s]\u001b[A\n",
            " 97% 26100/26883 [01:20<00:02, 323.68it/s]\u001b[A\n",
            " 97% 26133/26883 [01:20<00:02, 317.42it/s]\u001b[A\n",
            " 97% 26166/26883 [01:20<00:02, 320.38it/s]\u001b[A\n",
            " 97% 26199/26883 [01:20<00:02, 319.54it/s]\u001b[A\n",
            " 98% 26233/26883 [01:21<00:02, 324.53it/s]\u001b[A\n",
            " 98% 26267/26883 [01:21<00:01, 329.04it/s]\u001b[A\n",
            " 98% 26303/26883 [01:21<00:01, 336.78it/s]\u001b[A\n",
            " 98% 26337/26883 [01:21<00:01, 336.38it/s]\u001b[A\n",
            " 98% 26371/26883 [01:21<00:01, 322.45it/s]\u001b[A\n",
            " 98% 26404/26883 [01:21<00:01, 323.45it/s]\u001b[A\n",
            " 98% 26437/26883 [01:21<00:01, 322.91it/s]\u001b[A\n",
            " 98% 26472/26883 [01:21<00:01, 328.87it/s]\u001b[A\n",
            " 99% 26505/26883 [01:21<00:01, 329.09it/s]\u001b[A\n",
            " 99% 26542/26883 [01:21<00:01, 336.57it/s]\u001b[A\n",
            " 99% 26576/26883 [01:22<00:00, 336.10it/s]\u001b[A\n",
            " 99% 26610/26883 [01:22<00:00, 325.72it/s]\u001b[A\n",
            " 99% 26643/26883 [01:22<00:00, 325.85it/s]\u001b[A\n",
            " 99% 26676/26883 [01:22<00:00, 324.82it/s]\u001b[A\n",
            " 99% 26709/26883 [01:22<00:00, 322.02it/s]\u001b[A\n",
            " 99% 26742/26883 [01:22<00:00, 303.89it/s]\u001b[A\n",
            "100% 26775/26883 [01:22<00:00, 308.81it/s]\u001b[A\n",
            "100% 26812/26883 [01:22<00:00, 322.50it/s]\u001b[A\n",
            "100% 26849/26883 [01:22<00:00, 335.89it/s]\u001b[A\n",
            "100% 26883/26883 [01:23<00:00, 323.78it/s]\n",
            "1it [01:23, 83.20s/it]\n",
            "No errors! Data is correctly formated!\n",
            "Stats for /content/data/ultrachat_chunk_train.jsonl \n",
            " -------------------- \n",
            " {\n",
            "    \"expected\": {\n",
            "        \"eta\": \"00:05:57\",\n",
            "        \"data_tokens\": 25169147,\n",
            "        \"train_tokens\": 2621440,\n",
            "        \"epochs\": \"0.10\",\n",
            "        \"max_steps\": 10,\n",
            "        \"data_tokens_per_dataset\": {\n",
            "            \"/content/data/ultrachat_chunk_train.jsonl\": \"25169147.0\"\n",
            "        },\n",
            "        \"train_tokens_per_dataset\": {\n",
            "            \"/content/data/ultrachat_chunk_train.jsonl\": \"2621440.0\"\n",
            "        },\n",
            "        \"epochs_per_dataset\": {\n",
            "            \"/content/data/ultrachat_chunk_train.jsonl\": \"0.1\"\n",
            "        }\n",
            "    }\n",
            "}\n",
            "0it [00:00, ?it/s]Validating /content/data/ultrachat_chunk_eval.jsonl ...\n",
            "\n",
            "  0% 0/1415 [00:00<?, ?it/s]\u001b[A\n",
            "  2% 32/1415 [00:00<00:04, 309.61it/s]\u001b[A\n",
            "  5% 65/1415 [00:00<00:04, 320.17it/s]\u001b[A\n",
            "  8% 110/1415 [00:00<00:03, 375.85it/s]\u001b[A\n",
            " 10% 148/1415 [00:00<00:03, 360.64it/s]\u001b[A\n",
            " 13% 185/1415 [00:00<00:03, 345.18it/s]\u001b[A\n",
            " 16% 221/1415 [00:00<00:03, 349.78it/s]\u001b[A\n",
            " 19% 263/1415 [00:00<00:03, 369.34it/s]\u001b[A\n",
            " 21% 301/1415 [00:00<00:03, 362.98it/s]\u001b[A\n",
            " 24% 340/1415 [00:00<00:02, 370.27it/s]\u001b[A\n",
            " 27% 378/1415 [00:01<00:02, 353.03it/s]\u001b[A\n",
            " 29% 417/1415 [00:01<00:02, 361.98it/s]\u001b[A\n",
            " 32% 454/1415 [00:01<00:02, 353.81it/s]\u001b[A\n",
            " 35% 490/1415 [00:01<00:02, 355.04it/s]\u001b[A\n",
            " 37% 529/1415 [00:01<00:02, 360.85it/s]\u001b[A\n",
            " 40% 566/1415 [00:01<00:02, 356.66it/s]\u001b[A\n",
            " 43% 602/1415 [00:01<00:02, 350.36it/s]\u001b[A\n",
            " 45% 639/1415 [00:01<00:02, 355.64it/s]\u001b[A\n",
            " 48% 675/1415 [00:01<00:02, 340.10it/s]\u001b[A\n",
            " 50% 712/1415 [00:02<00:02, 346.88it/s]\u001b[A\n",
            " 53% 751/1415 [00:02<00:01, 355.95it/s]\u001b[A\n",
            " 56% 787/1415 [00:02<00:01, 351.18it/s]\u001b[A\n",
            " 58% 823/1415 [00:02<00:01, 348.88it/s]\u001b[A\n",
            " 61% 863/1415 [00:02<00:01, 358.90it/s]\u001b[A\n",
            " 64% 899/1415 [00:02<00:01, 345.68it/s]\u001b[A\n",
            " 66% 938/1415 [00:02<00:01, 355.23it/s]\u001b[A\n",
            " 69% 978/1415 [00:02<00:01, 366.70it/s]\u001b[A\n",
            " 72% 1015/1415 [00:02<00:01, 366.14it/s]\u001b[A\n",
            " 74% 1052/1415 [00:02<00:01, 362.09it/s]\u001b[A\n",
            " 77% 1089/1415 [00:03<00:00, 363.22it/s]\u001b[A\n",
            " 80% 1126/1415 [00:03<00:00, 350.60it/s]\u001b[A\n",
            " 82% 1163/1415 [00:03<00:00, 352.81it/s]\u001b[A\n",
            " 85% 1205/1415 [00:03<00:00, 371.19it/s]\u001b[A\n",
            " 88% 1243/1415 [00:03<00:00, 364.90it/s]\u001b[A\n",
            " 90% 1280/1415 [00:03<00:00, 365.75it/s]\u001b[A\n",
            " 93% 1318/1415 [00:03<00:00, 368.56it/s]\u001b[A\n",
            " 96% 1355/1415 [00:03<00:00, 368.03it/s]\u001b[A\n",
            "100% 1415/1415 [00:03<00:00, 356.35it/s]\n",
            "1it [00:03,  3.98s/it]\n",
            "No errors! Data is correctly formated!\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Start training"
      ],
      "metadata": {
        "id": "Hia7n0T1_mHZ"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# these info is needed for training\n",
        "import os\n",
        "os.environ[\"CUDA_DEVICE_ORDER\"]=\"PCI_BUS_ID\"\n",
        "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"0\""
      ],
      "metadata": {
        "id": "ZtcLerooWFeB"
      },
      "execution_count": 19,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# define training configuration\n",
        "# for your own use cases, you might want to change the data paths, model path, run_dir, and other hyperparameters\n",
        "\n",
        "config = \"\"\"\n",
        "# data\n",
        "data:\n",
        "  instruct_data: \"/content/data/ultrachat_chunk_train.jsonl\"  # Fill\n",
        "  data: \"\"  # Optionally fill with pretraining data\n",
        "  eval_instruct_data: \"/content/data/ultrachat_chunk_eval.jsonl\"  # Optionally fill\n",
        "\n",
        "# model\n",
        "model_id_or_path: \"/content/mistral_models\"  # Change to downloaded path\n",
        "lora:\n",
        "  rank: 64\n",
        "\n",
        "# optim\n",
        "# tokens per training steps = batch_size x num_GPUs x seq_len\n",
        "# we recommend sequence lentgh of 32768\n",
        "# If you run into memory error, you can try reduce the sequence length\n",
        "seq_len: 8192\n",
        "batch_size: 1\n",
        "num_microbatches: 8\n",
        "max_steps: 100\n",
        "optim:\n",
        "  lr: 1.e-4\n",
        "  weight_decay: 0.1\n",
        "  pct_start: 0.05\n",
        "\n",
        "# other\n",
        "seed: 0\n",
        "log_freq: 1\n",
        "eval_freq: 100\n",
        "no_eval: False\n",
        "ckpt_freq: 100\n",
        "\n",
        "ckpt_only_lora: True  # save only trained LoRA adapters. Set to `False` to merge LoRA adapter into the base model and save full fine-tuned model\n",
        "\n",
        "run_dir: \"/content/test_ultra\"  # Fill\n",
        "\"\"\"\n",
        "\n",
        "# save the same file locally into the example.yaml file\n",
        "import yaml\n",
        "with open('example.yaml', 'w') as file:\n",
        "    yaml.dump(yaml.safe_load(config), file)\n"
      ],
      "metadata": {
        "id": "5dxTlIQMaJGv"
      },
      "execution_count": 20,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# make sure the run_dir has not been created before\n",
        "# only run this when you ran torchrun previously and created the /content/test_ultra file\n",
        "# ! rm -r /content/test_ultra"
      ],
      "metadata": {
        "id": "ErD1ktQUMyPZ"
      },
      "execution_count": 21,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# start training\n",
        "\n",
        "!torchrun --nproc-per-node 1 -m train example.yaml"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "e4wFgmwIUTtg",
        "outputId": "8fe22185-6e12-4987-c4f6-3768952cec7c"
      },
      "execution_count": 22,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "2024-05-24 18:58:16.690967: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
            "2024-05-24 18:58:17.292359: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
            "2024-05-24 18:58:17.292438: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
            "2024-05-24 18:58:17.418671: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
            "2024-05-24 18:58:17.481373: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
            "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
            "2024-05-24 18:58:18.646197: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
            "args: TrainArgs(data=DataArgs(data='', shuffle=False, instruct_data='/content/data/ultrachat_chunk_train.jsonl', eval_instruct_data='/content/data/ultrachat_chunk_eval.jsonl', instruct=InstructArgs(shuffle=True, dynamic_chunk_fn_call=True)), model_id_or_path='/content/mistral_models', run_dir='/content/test_ultra', optim=OptimArgs(lr=0.0001, weight_decay=0.1, pct_start=0.05), seed=0, num_microbatches=8, seq_len=8192, batch_size=1, max_norm=1.0, max_steps=100, log_freq=1, ckpt_freq=100, ckpt_only_lora=True, no_ckpt=False, num_ckpt_keep=3, eval_freq=100, no_eval=False, checkpoint=True, world_size=1, wandb=WandbArgs(project=None, offline=False, key=None, run_name=None), mlflow=MLFlowArgs(tracking_uri=None, experiment_name=None), lora=LoraArgs(enable=True, rank=64, dropout=0.0, scaling=2.0))\n",
            "2024-05-24 18:58:19 (UTC) - 0:00:08 - distributed - INFO - torch.cuda.device_count: 1\n",
            "2024-05-24 18:58:19 (UTC) - 0:00:08 - distributed - INFO - CUDA_VISIBLE_DEVICES: 0\n",
            "2024-05-24 18:58:19 (UTC) - 0:00:08 - distributed - INFO - local rank: 0\n",
            "2024-05-24 18:58:19 (UTC) - 0:00:08 - train - INFO - Going to init comms...\n",
            "2024-05-24 18:58:19 (UTC) - 0:00:08 - train - INFO - Run dir: /content/test_ultra\n",
            "2024-05-24 18:58:20 (UTC) - 0:00:09 - train - INFO - TrainArgs: {'batch_size': 1,\n",
            " 'checkpoint': True,\n",
            " 'ckpt_freq': 100,\n",
            " 'ckpt_only_lora': True,\n",
            " 'data': {'data': '',\n",
            "          'eval_instruct_data': '/content/data/ultrachat_chunk_eval.jsonl',\n",
            "          'instruct': {'dynamic_chunk_fn_call': True, 'shuffle': True},\n",
            "          'instruct_data': '/content/data/ultrachat_chunk_train.jsonl',\n",
            "          'shuffle': False},\n",
            " 'eval_freq': 100,\n",
            " 'log_freq': 1,\n",
            " 'lora': {'dropout': 0.0, 'enable': True, 'rank': 64, 'scaling': 2.0},\n",
            " 'max_norm': 1.0,\n",
            " 'max_steps': 100,\n",
            " 'mlflow': {'experiment_name': None, 'tracking_uri': None},\n",
            " 'model_id_or_path': '/content/mistral_models',\n",
            " 'no_ckpt': False,\n",
            " 'no_eval': False,\n",
            " 'num_ckpt_keep': 3,\n",
            " 'num_microbatches': 8,\n",
            " 'optim': {'lr': 0.0001, 'pct_start': 0.05, 'weight_decay': 0.1},\n",
            " 'run_dir': '/content/test_ultra',\n",
            " 'seed': 0,\n",
            " 'seq_len': 8192,\n",
            " 'wandb': {'key': None, 'offline': False, 'project': None, 'run_name': None},\n",
            " 'world_size': 1}\n",
            "2024-05-24 18:58:25 (UTC) - 0:00:13 - finetune.wrapped_model - INFO - Reloading model from /content/mistral_models/consolidated.safetensors ...\n",
            "2024-05-24 18:58:25 (UTC) - 0:00:13 - finetune.wrapped_model - INFO - Converting model to dtype torch.bfloat16 ...\n",
            "2024-05-24 18:58:25 (UTC) - 0:00:13 - finetune.wrapped_model - INFO - Loaded model on cpu!\n",
            "2024-05-24 18:58:25 (UTC) - 0:00:13 - finetune.wrapped_model - INFO - Initializing lora layers ...\n",
            "2024-05-24 18:58:26 (UTC) - 0:00:14 - finetune.wrapped_model - INFO - Finished initialization!\n",
            "2024-05-24 18:58:26 (UTC) - 0:00:14 - finetune.wrapped_model - INFO - Sharding model over 1 GPUs ...\n",
            "2024-05-24 18:58:30 (UTC) - 0:00:19 - finetune.wrapped_model - INFO - Model sharded!\n",
            "2024-05-24 18:58:30 (UTC) - 0:00:19 - finetune.wrapped_model - INFO - 167,772,160 out of 7,415,795,712 parameter are finetuned (2.26%).\n",
            "2024-05-24 18:58:31 (UTC) - 0:00:20 - dataset - INFO - Loading /content/data/ultrachat_chunk_train.jsonl ...\n",
            "2024-05-24 19:00:03 (UTC) - 0:01:51 - dataset - INFO - /content/data/ultrachat_chunk_train.jsonl loaded and tokenized.\n",
            "2024-05-24 19:00:03 (UTC) - 0:01:51 - dataset - INFO - Shuffling /content/data/ultrachat_chunk_train.jsonl ...\n",
            "2024-05-24 19:00:23 (UTC) - 0:02:11 - train - INFO - step: 000001 - done (%): 1.0 - loss: 0.874 - lr: 4.0e-06 - peak_alloc_mem (GB): 21.0 - alloc_mem (GB): 17.1 - words_per_second: 585.7 - avg_words_per_second: 585.7 - ETA: >2024-05-24 22:05:01\n",
            "2024-05-24 19:00:41 (UTC) - 0:02:30 - train - INFO - step: 000002 - done (%): 2.0 - loss: 0.905 - lr: 1.8e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3624.4 - avg_words_per_second: 1008.4 - ETA: >2024-05-24 20:46:50\n",
            "2024-05-24 19:00:59 (UTC) - 0:02:47 - train - INFO - step: 000003 - done (%): 3.0 - loss: 0.912 - lr: 5.2e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3706.1 - avg_words_per_second: 1331.5 - ETA: >2024-05-24 20:20:33\n",
            "2024-05-24 19:01:17 (UTC) - 0:03:05 - train - INFO - step: 000004 - done (%): 4.0 - loss: 0.884 - lr: 8.6e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3591.8 - avg_words_per_second: 1580.0 - ETA: >2024-05-24 20:07:39\n",
            "2024-05-24 19:01:35 (UTC) - 0:03:24 - train - INFO - step: 000005 - done (%): 5.0 - loss: 0.835 - lr: 1.0e-04 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3567.3 - avg_words_per_second: 1778.2 - ETA: >2024-05-24 19:59:57\n",
            "2024-05-24 19:01:53 (UTC) - 0:03:42 - train - INFO - step: 000006 - done (%): 6.0 - loss: 0.858 - lr: 1.0e-04 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3710.8 - avg_words_per_second: 1947.2 - ETA: >2024-05-24 19:54:37\n",
            "2024-05-24 19:02:11 (UTC) - 0:04:00 - train - INFO - step: 000007 - done (%): 7.0 - loss: 0.868 - lr: 1.0e-04 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3606.2 - avg_words_per_second: 2084.1 - ETA: >2024-05-24 19:50:55\n",
            "2024-05-24 19:02:29 (UTC) - 0:04:18 - train - INFO - step: 000008 - done (%): 8.0 - loss: 0.868 - lr: 1.0e-04 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3622.3 - avg_words_per_second: 2201.0 - ETA: >2024-05-24 19:48:09\n",
            "2024-05-24 19:02:47 (UTC) - 0:04:36 - train - INFO - step: 000009 - done (%): 9.0 - loss: 0.804 - lr: 1.0e-04 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3690.9 - avg_words_per_second: 2304.3 - ETA: >2024-05-24 19:45:55\n",
            "2024-05-24 19:03:05 (UTC) - 0:04:54 - train - INFO - step: 000010 - done (%): 10.0 - loss: 0.898 - lr: 9.9e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3584.2 - avg_words_per_second: 2389.7 - ETA: >2024-05-24 19:44:13\n",
            "2024-05-24 19:03:23 (UTC) - 0:05:12 - train - INFO - step: 000011 - done (%): 11.0 - loss: 0.744 - lr: 9.9e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3695.7 - avg_words_per_second: 2469.0 - ETA: >2024-05-24 19:42:45\n",
            "2024-05-24 19:03:41 (UTC) - 0:05:30 - train - INFO - step: 000012 - done (%): 12.0 - loss: 0.837 - lr: 9.9e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3575.5 - avg_words_per_second: 2534.3 - ETA: >2024-05-24 19:41:37\n",
            "2024-05-24 19:04:00 (UTC) - 0:05:48 - train - INFO - step: 000013 - done (%): 13.0 - loss: 0.846 - lr: 9.8e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3561.0 - avg_words_per_second: 2591.8 - ETA: >2024-05-24 19:40:40\n",
            "2024-05-24 19:04:17 (UTC) - 0:06:06 - train - INFO - step: 000014 - done (%): 14.0 - loss: 0.854 - lr: 9.8e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3694.8 - avg_words_per_second: 2648.3 - ETA: >2024-05-24 19:39:46\n",
            "2024-05-24 19:04:36 (UTC) - 0:06:24 - train - INFO - step: 000015 - done (%): 15.0 - loss: 0.911 - lr: 9.7e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3579.8 - avg_words_per_second: 2695.0 - ETA: >2024-05-24 19:39:03\n",
            "2024-05-24 19:04:54 (UTC) - 0:06:42 - train - INFO - step: 000016 - done (%): 16.0 - loss: 0.815 - lr: 9.7e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3686.7 - avg_words_per_second: 2741.1 - ETA: >2024-05-24 19:38:22\n",
            "2024-05-24 19:05:12 (UTC) - 0:07:00 - train - INFO - step: 000017 - done (%): 17.0 - loss: 0.821 - lr: 9.6e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3605.0 - avg_words_per_second: 2780.3 - ETA: >2024-05-24 19:37:48\n",
            "2024-05-24 19:05:30 (UTC) - 0:07:19 - train - INFO - step: 000018 - done (%): 18.0 - loss: 0.842 - lr: 9.5e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3601.2 - avg_words_per_second: 2816.0 - ETA: >2024-05-24 19:37:18\n",
            "2024-05-24 19:05:48 (UTC) - 0:07:36 - train - INFO - step: 000019 - done (%): 19.0 - loss: 0.802 - lr: 9.5e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3700.0 - avg_words_per_second: 2851.8 - ETA: >2024-05-24 19:36:49\n",
            "2024-05-24 19:06:06 (UTC) - 0:07:54 - train - INFO - step: 000020 - done (%): 20.0 - loss: 0.867 - lr: 9.4e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3599.4 - avg_words_per_second: 2881.8 - ETA: >2024-05-24 19:36:25\n",
            "2024-05-24 19:06:24 (UTC) - 0:08:13 - train - INFO - step: 000021 - done (%): 21.0 - loss: 0.826 - lr: 9.3e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3602.7 - avg_words_per_second: 2909.5 - ETA: >2024-05-24 19:36:03\n",
            "2024-05-24 19:06:42 (UTC) - 0:08:30 - train - INFO - step: 000022 - done (%): 22.0 - loss: 0.809 - lr: 9.2e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3696.9 - avg_words_per_second: 2937.9 - ETA: >2024-05-24 19:35:42\n",
            "2024-05-24 19:07:00 (UTC) - 0:08:49 - train - INFO - step: 000023 - done (%): 23.0 - loss: 0.837 - lr: 9.1e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3608.7 - avg_words_per_second: 2961.9 - ETA: >2024-05-24 19:35:24\n",
            "2024-05-24 19:07:18 (UTC) - 0:09:06 - train - INFO - step: 000024 - done (%): 24.0 - loss: 0.851 - lr: 9.0e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3690.6 - avg_words_per_second: 2986.4 - ETA: >2024-05-24 19:35:05\n",
            "2024-05-24 19:07:36 (UTC) - 0:09:24 - train - INFO - step: 000025 - done (%): 25.0 - loss: 0.856 - lr: 8.9e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3596.4 - avg_words_per_second: 3006.8 - ETA: >2024-05-24 19:34:51\n",
            "2024-05-24 19:07:54 (UTC) - 0:09:43 - train - INFO - step: 000026 - done (%): 26.0 - loss: 0.781 - lr: 8.8e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3576.7 - avg_words_per_second: 3025.4 - ETA: >2024-05-24 19:34:37\n",
            "2024-05-24 19:08:12 (UTC) - 0:10:01 - train - INFO - step: 000027 - done (%): 27.0 - loss: 0.845 - lr: 8.7e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3695.0 - avg_words_per_second: 3045.8 - ETA: >2024-05-24 19:34:23\n",
            "2024-05-24 19:08:30 (UTC) - 0:10:19 - train - INFO - step: 000028 - done (%): 28.0 - loss: 0.831 - lr: 8.6e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3603.0 - avg_words_per_second: 3062.7 - ETA: >2024-05-24 19:34:11\n",
            "2024-05-24 19:08:48 (UTC) - 0:10:37 - train - INFO - step: 000029 - done (%): 29.0 - loss: 0.806 - lr: 8.5e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3612.2 - avg_words_per_second: 3078.9 - ETA: >2024-05-24 19:34:00\n",
            "2024-05-24 19:09:06 (UTC) - 0:10:55 - train - INFO - step: 000030 - done (%): 30.0 - loss: 0.898 - lr: 8.4e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3697.6 - avg_words_per_second: 3096.2 - ETA: >2024-05-24 19:33:48\n",
            "2024-05-24 19:09:24 (UTC) - 0:11:13 - train - INFO - step: 000031 - done (%): 31.0 - loss: 0.817 - lr: 8.3e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3588.0 - avg_words_per_second: 3109.9 - ETA: >2024-05-24 19:33:38\n",
            "2024-05-24 19:09:42 (UTC) - 0:11:31 - train - INFO - step: 000032 - done (%): 32.0 - loss: 0.825 - lr: 8.1e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3687.5 - avg_words_per_second: 3125.2 - ETA: >2024-05-24 19:33:28\n",
            "2024-05-24 19:10:00 (UTC) - 0:11:49 - train - INFO - step: 000033 - done (%): 33.0 - loss: 0.845 - lr: 8.0e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3593.7 - avg_words_per_second: 3137.6 - ETA: >2024-05-24 19:33:20\n",
            "2024-05-24 19:10:19 (UTC) - 0:12:07 - train - INFO - step: 000034 - done (%): 34.0 - loss: 0.808 - lr: 7.9e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3558.8 - avg_words_per_second: 3148.6 - ETA: >2024-05-24 19:33:12\n",
            "2024-05-24 19:10:36 (UTC) - 0:12:25 - train - INFO - step: 000035 - done (%): 35.0 - loss: 0.853 - lr: 7.7e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3694.1 - avg_words_per_second: 3161.9 - ETA: >2024-05-24 19:33:04\n",
            "2024-05-24 19:10:55 (UTC) - 0:12:43 - train - INFO - step: 000036 - done (%): 36.0 - loss: 0.813 - lr: 7.6e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3585.6 - avg_words_per_second: 3172.3 - ETA: >2024-05-24 19:32:57\n",
            "2024-05-24 19:11:12 (UTC) - 0:13:01 - train - INFO - step: 000037 - done (%): 37.0 - loss: 0.801 - lr: 7.5e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3694.8 - avg_words_per_second: 3184.5 - ETA: >2024-05-24 19:32:49\n",
            "2024-05-24 19:11:31 (UTC) - 0:13:19 - train - INFO - step: 000038 - done (%): 38.0 - loss: 0.744 - lr: 7.3e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3595.3 - avg_words_per_second: 3194.1 - ETA: >2024-05-24 19:32:43\n",
            "2024-05-24 19:11:49 (UTC) - 0:13:37 - train - INFO - step: 000039 - done (%): 39.0 - loss: 0.816 - lr: 7.2e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3607.8 - avg_words_per_second: 3203.5 - ETA: >2024-05-24 19:32:37\n",
            "2024-05-24 19:12:07 (UTC) - 0:13:55 - train - INFO - step: 000040 - done (%): 40.0 - loss: 0.786 - lr: 7.0e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3691.9 - avg_words_per_second: 3214.1 - ETA: >2024-05-24 19:32:30\n",
            "2024-05-24 19:12:25 (UTC) - 0:14:13 - train - INFO - step: 000041 - done (%): 41.0 - loss: 0.804 - lr: 6.9e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3600.7 - avg_words_per_second: 3222.6 - ETA: >2024-05-24 19:32:25\n",
            "2024-05-24 19:12:43 (UTC) - 0:14:32 - train - INFO - step: 000042 - done (%): 42.0 - loss: 0.845 - lr: 6.7e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3616.1 - avg_words_per_second: 3230.9 - ETA: >2024-05-24 19:32:19\n",
            "2024-05-24 19:13:01 (UTC) - 0:14:49 - train - INFO - step: 000043 - done (%): 43.0 - loss: 0.864 - lr: 6.5e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3688.4 - avg_words_per_second: 3240.3 - ETA: >2024-05-24 19:32:14\n",
            "2024-05-24 19:13:19 (UTC) - 0:15:07 - train - INFO - step: 000044 - done (%): 44.0 - loss: 0.862 - lr: 6.4e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3616.8 - avg_words_per_second: 3248.0 - ETA: >2024-05-24 19:32:09\n",
            "2024-05-24 19:13:37 (UTC) - 0:15:25 - train - INFO - step: 000045 - done (%): 45.0 - loss: 0.862 - lr: 6.2e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3690.6 - avg_words_per_second: 3256.7 - ETA: >2024-05-24 19:32:03\n",
            "2024-05-24 19:13:55 (UTC) - 0:15:43 - train - INFO - step: 000046 - done (%): 46.0 - loss: 0.829 - lr: 6.1e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3601.5 - avg_words_per_second: 3263.5 - ETA: >2024-05-24 19:31:59\n",
            "2024-05-24 19:14:13 (UTC) - 0:16:02 - train - INFO - step: 000047 - done (%): 47.0 - loss: 0.812 - lr: 5.9e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3590.5 - avg_words_per_second: 3269.8 - ETA: >2024-05-24 19:31:55\n",
            "2024-05-24 19:14:31 (UTC) - 0:16:19 - train - INFO - step: 000048 - done (%): 48.0 - loss: 0.818 - lr: 5.7e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3704.5 - avg_words_per_second: 3277.8 - ETA: >2024-05-24 19:31:50\n",
            "2024-05-24 19:14:49 (UTC) - 0:16:38 - train - INFO - step: 000049 - done (%): 49.0 - loss: 0.817 - lr: 5.6e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3608.4 - avg_words_per_second: 3283.9 - ETA: >2024-05-24 19:31:47\n",
            "2024-05-24 19:15:07 (UTC) - 0:16:56 - train - INFO - step: 000050 - done (%): 50.0 - loss: 0.888 - lr: 5.4e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3619.2 - avg_words_per_second: 3290.0 - ETA: >2024-05-24 19:31:43\n",
            "2024-05-24 19:15:25 (UTC) - 0:17:13 - train - INFO - step: 000051 - done (%): 51.0 - loss: 0.777 - lr: 5.2e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3690.4 - avg_words_per_second: 3297.0 - ETA: >2024-05-24 19:31:39\n",
            "2024-05-24 19:15:43 (UTC) - 0:17:32 - train - INFO - step: 000052 - done (%): 52.0 - loss: 0.804 - lr: 5.1e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3589.4 - avg_words_per_second: 3302.2 - ETA: >2024-05-24 19:31:36\n",
            "2024-05-24 19:16:01 (UTC) - 0:17:49 - train - INFO - step: 000053 - done (%): 53.0 - loss: 0.800 - lr: 4.9e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3692.1 - avg_words_per_second: 3308.8 - ETA: >2024-05-24 19:31:32\n",
            "2024-05-24 19:16:19 (UTC) - 0:18:08 - train - INFO - step: 000054 - done (%): 54.0 - loss: 0.804 - lr: 4.8e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3588.5 - avg_words_per_second: 3313.6 - ETA: >2024-05-24 19:31:29\n",
            "2024-05-24 19:16:37 (UTC) - 0:18:26 - train - INFO - step: 000055 - done (%): 55.0 - loss: 0.854 - lr: 4.6e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3571.2 - avg_words_per_second: 3317.9 - ETA: >2024-05-24 19:31:26\n",
            "2024-05-24 19:16:55 (UTC) - 0:18:44 - train - INFO - step: 000056 - done (%): 56.0 - loss: 0.819 - lr: 4.4e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3696.4 - avg_words_per_second: 3324.0 - ETA: >2024-05-24 19:31:23\n",
            "2024-05-24 19:17:13 (UTC) - 0:19:02 - train - INFO - step: 000057 - done (%): 57.0 - loss: 0.844 - lr: 4.3e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3606.8 - avg_words_per_second: 3328.6 - ETA: >2024-05-24 19:31:20\n",
            "2024-05-24 19:17:31 (UTC) - 0:19:20 - train - INFO - step: 000058 - done (%): 58.0 - loss: 0.840 - lr: 4.1e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3706.0 - avg_words_per_second: 3334.5 - ETA: >2024-05-24 19:31:16\n",
            "2024-05-24 19:17:49 (UTC) - 0:19:38 - train - INFO - step: 000059 - done (%): 59.0 - loss: 0.836 - lr: 3.9e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3616.5 - avg_words_per_second: 3338.9 - ETA: >2024-05-24 19:31:14\n",
            "2024-05-24 19:18:07 (UTC) - 0:19:56 - train - INFO - step: 000060 - done (%): 60.0 - loss: 0.852 - lr: 3.8e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3610.4 - avg_words_per_second: 3343.1 - ETA: >2024-05-24 19:31:11\n",
            "2024-05-24 19:18:25 (UTC) - 0:20:14 - train - INFO - step: 000061 - done (%): 61.0 - loss: 0.837 - lr: 3.6e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3704.4 - avg_words_per_second: 3348.4 - ETA: >2024-05-24 19:31:08\n",
            "2024-05-24 19:18:43 (UTC) - 0:20:32 - train - INFO - step: 000062 - done (%): 62.0 - loss: 0.839 - lr: 3.5e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3607.6 - avg_words_per_second: 3352.3 - ETA: >2024-05-24 19:31:06\n",
            "2024-05-24 19:19:01 (UTC) - 0:20:50 - train - INFO - step: 000063 - done (%): 63.0 - loss: 0.813 - lr: 3.3e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3604.6 - avg_words_per_second: 3356.0 - ETA: >2024-05-24 19:31:04\n",
            "2024-05-24 19:19:19 (UTC) - 0:21:08 - train - INFO - step: 000064 - done (%): 64.0 - loss: 0.784 - lr: 3.1e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3689.6 - avg_words_per_second: 3360.8 - ETA: >2024-05-24 19:31:01\n",
            "2024-05-24 19:19:37 (UTC) - 0:21:26 - train - INFO - step: 000065 - done (%): 65.0 - loss: 0.797 - lr: 3.0e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3604.0 - avg_words_per_second: 3364.3 - ETA: >2024-05-24 19:30:59\n",
            "2024-05-24 19:19:55 (UTC) - 0:21:44 - train - INFO - step: 000066 - done (%): 66.0 - loss: 0.788 - lr: 2.8e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3697.6 - avg_words_per_second: 3368.9 - ETA: >2024-05-24 19:30:56\n",
            "2024-05-24 19:20:13 (UTC) - 0:22:02 - train - INFO - step: 000067 - done (%): 67.0 - loss: 0.902 - lr: 2.7e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3588.6 - avg_words_per_second: 3372.0 - ETA: >2024-05-24 19:30:55\n",
            "2024-05-24 19:20:32 (UTC) - 0:22:20 - train - INFO - step: 000068 - done (%): 68.0 - loss: 0.783 - lr: 2.5e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3565.3 - avg_words_per_second: 3374.6 - ETA: >2024-05-24 19:30:53\n",
            "2024-05-24 19:20:49 (UTC) - 0:22:38 - train - INFO - step: 000069 - done (%): 69.0 - loss: 0.889 - lr: 2.4e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3689.3 - avg_words_per_second: 3378.8 - ETA: >2024-05-24 19:30:51\n",
            "2024-05-24 19:21:08 (UTC) - 0:22:56 - train - INFO - step: 000070 - done (%): 70.0 - loss: 0.810 - lr: 2.3e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3601.3 - avg_words_per_second: 3381.8 - ETA: >2024-05-24 19:30:49\n",
            "2024-05-24 19:21:26 (UTC) - 0:23:14 - train - INFO - step: 000071 - done (%): 71.0 - loss: 0.796 - lr: 2.1e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3621.7 - avg_words_per_second: 3385.0 - ETA: >2024-05-24 19:30:47\n",
            "2024-05-24 19:21:43 (UTC) - 0:23:32 - train - INFO - step: 000072 - done (%): 72.0 - loss: 0.927 - lr: 2.0e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3681.6 - avg_words_per_second: 3388.8 - ETA: >2024-05-24 19:30:45\n",
            "2024-05-24 19:22:02 (UTC) - 0:23:50 - train - INFO - step: 000073 - done (%): 73.0 - loss: 0.811 - lr: 1.9e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3585.4 - avg_words_per_second: 3391.3 - ETA: >2024-05-24 19:30:44\n",
            "2024-05-24 19:22:20 (UTC) - 0:24:08 - train - INFO - step: 000074 - done (%): 74.0 - loss: 0.828 - lr: 1.7e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3678.2 - avg_words_per_second: 3394.9 - ETA: >2024-05-24 19:30:42\n",
            "2024-05-24 19:22:38 (UTC) - 0:24:26 - train - INFO - step: 000075 - done (%): 75.0 - loss: 0.908 - lr: 1.6e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3584.0 - avg_words_per_second: 3397.3 - ETA: >2024-05-24 19:30:40\n",
            "2024-05-24 19:22:56 (UTC) - 0:24:45 - train - INFO - step: 000076 - done (%): 76.0 - loss: 0.881 - lr: 1.5e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3575.5 - avg_words_per_second: 3399.5 - ETA: >2024-05-24 19:30:39\n",
            "2024-05-24 19:23:14 (UTC) - 0:25:03 - train - INFO - step: 000077 - done (%): 77.0 - loss: 0.819 - lr: 1.4e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3706.5 - avg_words_per_second: 3403.2 - ETA: >2024-05-24 19:30:37\n",
            "2024-05-24 19:23:32 (UTC) - 0:25:21 - train - INFO - step: 000078 - done (%): 78.0 - loss: 0.867 - lr: 1.3e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3600.9 - avg_words_per_second: 3405.6 - ETA: >2024-05-24 19:30:35\n",
            "2024-05-24 19:23:50 (UTC) - 0:25:38 - train - INFO - step: 000079 - done (%): 79.0 - loss: 0.913 - lr: 1.2e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3694.4 - avg_words_per_second: 3408.9 - ETA: >2024-05-24 19:30:34\n",
            "2024-05-24 19:24:08 (UTC) - 0:25:57 - train - INFO - step: 000080 - done (%): 80.0 - loss: 0.826 - lr: 1.1e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3603.8 - avg_words_per_second: 3411.2 - ETA: >2024-05-24 19:30:32\n",
            "2024-05-24 19:24:26 (UTC) - 0:26:15 - train - INFO - step: 000081 - done (%): 81.0 - loss: 0.835 - lr: 9.5e-06 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3611.0 - avg_words_per_second: 3413.6 - ETA: >2024-05-24 19:30:31\n",
            "2024-05-24 19:24:44 (UTC) - 0:26:33 - train - INFO - step: 000082 - done (%): 82.0 - loss: 0.854 - lr: 8.6e-06 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3686.5 - avg_words_per_second: 3416.7 - ETA: >2024-05-24 19:30:29\n",
            "2024-05-24 19:25:02 (UTC) - 0:26:51 - train - INFO - step: 000083 - done (%): 83.0 - loss: 0.772 - lr: 7.7e-06 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3596.4 - avg_words_per_second: 3418.7 - ETA: >2024-05-24 19:30:28\n",
            "2024-05-24 19:25:20 (UTC) - 0:27:09 - train - INFO - step: 000084 - done (%): 84.0 - loss: 0.813 - lr: 6.8e-06 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3607.9 - avg_words_per_second: 3420.9 - ETA: >2024-05-24 19:30:27\n",
            "2024-05-24 19:25:38 (UTC) - 0:27:27 - train - INFO - step: 000085 - done (%): 85.0 - loss: 0.836 - lr: 6.0e-06 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3690.1 - avg_words_per_second: 3423.8 - ETA: >2024-05-24 19:30:25\n",
            "2024-05-24 19:25:56 (UTC) - 0:27:45 - train - INFO - step: 000086 - done (%): 86.0 - loss: 0.866 - lr: 5.3e-06 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3607.4 - avg_words_per_second: 3425.8 - ETA: >2024-05-24 19:30:24\n",
            "2024-05-24 19:26:14 (UTC) - 0:28:03 - train - INFO - step: 000087 - done (%): 87.0 - loss: 0.845 - lr: 4.6e-06 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3706.4 - avg_words_per_second: 3428.8 - ETA: >2024-05-24 19:30:22\n",
            "2024-05-24 19:26:32 (UTC) - 0:28:21 - train - INFO - step: 000088 - done (%): 88.0 - loss: 0.777 - lr: 3.9e-06 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3591.4 - avg_words_per_second: 3430.6 - ETA: >2024-05-24 19:30:21\n",
            "2024-05-24 19:26:51 (UTC) - 0:28:39 - train - INFO - step: 000089 - done (%): 89.0 - loss: 0.813 - lr: 3.3e-06 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3569.4 - avg_words_per_second: 3432.1 - ETA: >2024-05-24 19:30:21\n",
            "2024-05-24 19:27:08 (UTC) - 0:28:57 - train - INFO - step: 000090 - done (%): 90.0 - loss: 0.841 - lr: 2.7e-06 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3697.1 - avg_words_per_second: 3434.8 - ETA: >2024-05-24 19:30:19\n",
            "2024-05-24 19:27:26 (UTC) - 0:29:15 - train - INFO - step: 000091 - done (%): 91.0 - loss: 0.807 - lr: 2.2e-06 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3611.5 - avg_words_per_second: 3436.6 - ETA: >2024-05-24 19:30:18\n",
            "2024-05-24 19:27:45 (UTC) - 0:29:33 - train - INFO - step: 000092 - done (%): 92.0 - loss: 0.807 - lr: 1.7e-06 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3607.2 - avg_words_per_second: 3438.4 - ETA: >2024-05-24 19:30:17\n",
            "2024-05-24 19:28:02 (UTC) - 0:29:51 - train - INFO - step: 000093 - done (%): 93.0 - loss: 0.827 - lr: 1.3e-06 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3696.3 - avg_words_per_second: 3441.0 - ETA: >2024-05-24 19:30:16\n",
            "2024-05-24 19:28:21 (UTC) - 0:30:09 - train - INFO - step: 000094 - done (%): 94.0 - loss: 0.816 - lr: 9.8e-07 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3583.1 - avg_words_per_second: 3442.5 - ETA: >2024-05-24 19:30:15\n",
            "2024-05-24 19:28:38 (UTC) - 0:30:27 - train - INFO - step: 000095 - done (%): 95.0 - loss: 0.825 - lr: 6.8e-07 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3695.6 - avg_words_per_second: 3444.9 - ETA: >2024-05-24 19:30:13\n",
            "2024-05-24 19:28:57 (UTC) - 0:30:45 - train - INFO - step: 000096 - done (%): 96.0 - loss: 0.794 - lr: 4.4e-07 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3580.1 - avg_words_per_second: 3446.3 - ETA: >2024-05-24 19:30:13\n",
            "2024-05-24 19:29:15 (UTC) - 0:31:04 - train - INFO - step: 000097 - done (%): 97.0 - loss: 0.884 - lr: 2.5e-07 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3574.7 - avg_words_per_second: 3447.6 - ETA: >2024-05-24 19:30:12\n",
            "2024-05-24 19:29:33 (UTC) - 0:31:21 - train - INFO - step: 000098 - done (%): 98.0 - loss: 0.826 - lr: 1.1e-07 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3698.8 - avg_words_per_second: 3450.0 - ETA: >2024-05-24 19:30:11\n",
            "2024-05-24 19:29:51 (UTC) - 0:31:40 - train - INFO - step: 000099 - done (%): 99.0 - loss: 0.771 - lr: 2.8e-08 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3604.2 - avg_words_per_second: 3451.5 - ETA: >2024-05-24 19:30:10\n",
            "2024-05-24 19:30:09 (UTC) - 0:31:57 - eval - INFO - Start eval...\n",
            "2024-05-24 19:32:04 (UTC) - 0:33:53 - eval - INFO - Eval finished!\n",
            "2024-05-24 19:32:04 (UTC) - 0:33:53 - train - INFO - step: 000100 - eval_perplexity: 1.779 - eval_loss: 0.831 - train_loss: 0.762\n",
            "2024-05-24 19:32:04 (UTC) - 0:33:53 - train - INFO - step: 000100 - done (%): 100.0 - loss: 0.762 - lr: 4.0e-10 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 492.8 - avg_words_per_second: 3256.0 - ETA: >2024-05-24 19:32:04\n",
            "2024-05-24 19:32:04 (UTC) - 0:33:53 - checkpointing - INFO - Dumping checkpoint in /content/test_ultra/checkpoints/checkpoint_000100/consolidated using tmp name: tmp.consolidated\n",
            "2024-05-24 19:32:05 (UTC) - 0:33:53 - checkpointing - INFO - Done dumping checkpoint in /content/test_ultra/checkpoints/checkpoint_000100/consolidated for step: 100\n",
            "2024-05-24 19:32:05 (UTC) - 0:33:53 - checkpointing - INFO - Done deleting checkpoints \n",
            "2024-05-24 19:32:05 (UTC) - 0:33:53 - checkpointing - INFO - Done!\n",
            "2024-05-24 19:32:05 (UTC) - 0:33:53 - train - INFO - done!\n",
            "2024-05-24 19:32:05 (UTC) - 0:33:53 - utils - INFO - Closing: eval_logger\n",
            "2024-05-24 19:32:05 (UTC) - 0:33:53 - utils - INFO - Closed: eval_logger\n",
            "2024-05-24 19:32:05 (UTC) - 0:33:53 - utils - INFO - Closing: metrics_logger\n",
            "2024-05-24 19:32:05 (UTC) - 0:33:53 - utils - INFO - Closed: metrics_logger\n",
            "2024-05-24 19:32:05 (UTC) - 0:33:53 - train - INFO - Closed everything!\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Inference"
      ],
      "metadata": {
        "id": "ruJ29JFn98zE"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install mistral_inference"
      ],
      "metadata": {
        "id": "7BWNGKt9-Kxz",
        "outputId": "61479b03-c608-455b-e99b-32d96ada9ca6",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      },
      "execution_count": 24,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Collecting mistral_inference\n",
            "  Downloading mistral_inference-1.1.0-py3-none-any.whl (21 kB)\n",
            "Requirement already satisfied: fire>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from mistral_inference) (0.6.0)\n",
            "Requirement already satisfied: mistral_common<2.0.0,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from mistral_inference) (1.1.0)\n",
            "Requirement already satisfied: safetensors>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from mistral_inference) (0.4.3)\n",
            "Requirement already satisfied: simple-parsing>=0.1.5 in /usr/local/lib/python3.10/dist-packages (from mistral_inference) (0.1.5)\n",
            "Requirement already satisfied: xformers>=0.0.24 in /usr/local/lib/python3.10/dist-packages (from mistral_inference) (0.0.24)\n",
            "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from fire>=0.6.0->mistral_inference) (1.16.0)\n",
            "Requirement already satisfied: termcolor in /usr/local/lib/python3.10/dist-packages (from fire>=0.6.0->mistral_inference) (2.4.0)\n",
            "Requirement already satisfied: jsonschema==4.21.1 in /usr/local/lib/python3.10/dist-packages (from mistral_common<2.0.0,>=1.0.0->mistral_inference) (4.21.1)\n",
            "Requirement already satisfied: pydantic==2.6.1 in /usr/local/lib/python3.10/dist-packages (from mistral_common<2.0.0,>=1.0.0->mistral_inference) (2.6.1)\n",
            "Requirement already satisfied: sentencepiece==0.1.99 in /usr/local/lib/python3.10/dist-packages (from mistral_common<2.0.0,>=1.0.0->mistral_inference) (0.1.99)\n",
            "Requirement already satisfied: typing-extensions<5.0.0,>=4.11.0 in /usr/local/lib/python3.10/dist-packages (from mistral_common<2.0.0,>=1.0.0->mistral_inference) (4.11.0)\n",
            "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema==4.21.1->mistral_common<2.0.0,>=1.0.0->mistral_inference) (23.2.0)\n",
            "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema==4.21.1->mistral_common<2.0.0,>=1.0.0->mistral_inference) (2023.12.1)\n",
            "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema==4.21.1->mistral_common<2.0.0,>=1.0.0->mistral_inference) (0.35.1)\n",
            "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema==4.21.1->mistral_common<2.0.0,>=1.0.0->mistral_inference) (0.18.1)\n",
            "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic==2.6.1->mistral_common<2.0.0,>=1.0.0->mistral_inference) (0.7.0)\n",
            "Requirement already satisfied: pydantic-core==2.16.2 in /usr/local/lib/python3.10/dist-packages (from pydantic==2.6.1->mistral_common<2.0.0,>=1.0.0->mistral_inference) (2.16.2)\n",
            "Requirement already satisfied: docstring-parser~=0.15 in /usr/local/lib/python3.10/dist-packages (from simple-parsing>=0.1.5->mistral_inference) (0.16)\n",
            "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from xformers>=0.0.24->mistral_inference) (1.25.2)\n",
            "Requirement already satisfied: torch==2.2.0 in /usr/local/lib/python3.10/dist-packages (from xformers>=0.0.24->mistral_inference) (2.2.0)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch==2.2.0->xformers>=0.0.24->mistral_inference) (3.14.0)\n",
            "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch==2.2.0->xformers>=0.0.24->mistral_inference) (1.12)\n",
            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch==2.2.0->xformers>=0.0.24->mistral_inference) (3.3)\n",
            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch==2.2.0->xformers>=0.0.24->mistral_inference) (3.1.4)\n",
            "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch==2.2.0->xformers>=0.0.24->mistral_inference) (2023.6.0)\n",
            "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch==2.2.0->xformers>=0.0.24->mistral_inference) (12.1.105)\n",
            "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch==2.2.0->xformers>=0.0.24->mistral_inference) (12.1.105)\n",
            "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch==2.2.0->xformers>=0.0.24->mistral_inference) (12.1.105)\n",
            "Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /usr/local/lib/python3.10/dist-packages (from torch==2.2.0->xformers>=0.0.24->mistral_inference) (8.9.2.26)\n",
            "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.10/dist-packages (from torch==2.2.0->xformers>=0.0.24->mistral_inference) (12.1.3.1)\n",
            "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.10/dist-packages (from torch==2.2.0->xformers>=0.0.24->mistral_inference) (11.0.2.54)\n",
            "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.10/dist-packages (from torch==2.2.0->xformers>=0.0.24->mistral_inference) (10.3.2.106)\n",
            "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.10/dist-packages (from torch==2.2.0->xformers>=0.0.24->mistral_inference) (11.4.5.107)\n",
            "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.10/dist-packages (from torch==2.2.0->xformers>=0.0.24->mistral_inference) (12.1.0.106)\n",
            "Requirement already satisfied: nvidia-nccl-cu12==2.19.3 in /usr/local/lib/python3.10/dist-packages (from torch==2.2.0->xformers>=0.0.24->mistral_inference) (2.19.3)\n",
            "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch==2.2.0->xformers>=0.0.24->mistral_inference) (12.1.105)\n",
            "Requirement already satisfied: triton==2.2.0 in /usr/local/lib/python3.10/dist-packages (from torch==2.2.0->xformers>=0.0.24->mistral_inference) (2.2.0)\n",
            "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.10/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch==2.2.0->xformers>=0.0.24->mistral_inference) (12.5.40)\n",
            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch==2.2.0->xformers>=0.0.24->mistral_inference) (2.1.5)\n",
            "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch==2.2.0->xformers>=0.0.24->mistral_inference) (1.3.0)\n",
            "Installing collected packages: mistral_inference\n",
            "Successfully installed mistral_inference-1.1.0\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from mistral_inference.model import Transformer\n",
        "from mistral_inference.generate import generate\n",
        "\n",
        "from mistral_common.tokens.tokenizers.mistral import MistralTokenizer\n",
        "from mistral_common.protocol.instruct.messages import UserMessage\n",
        "from mistral_common.protocol.instruct.request import ChatCompletionRequest\n",
        "\n",
        "\n",
        "tokenizer = MistralTokenizer.from_file(\"/content/mistral_models/tokenizer.model.v3\")  # change to extracted tokenizer file\n",
        "model = Transformer.from_folder(\"/content/mistral_models\")  # change to extracted model dir\n",
        "model.load_lora(\"/content/test_ultra/checkpoints/checkpoint_000100/consolidated/lora.safetensors\")\n",
        "\n",
        "completion_request = ChatCompletionRequest(messages=[UserMessage(content=\"Explain Machine Learning to me in a nutshell.\")])\n",
        "\n",
        "tokens = tokenizer.encode_chat_completion(completion_request).tokens\n",
        "\n",
        "out_tokens, _ = generate([tokens], model, max_tokens=64, temperature=0.0, eos_id=tokenizer.instruct_tokenizer.tokenizer.eos_id)\n",
        "result = tokenizer.instruct_tokenizer.tokenizer.decode(out_tokens[0])\n",
        "\n",
        "print(result)"
      ],
      "metadata": {
        "id": "F-xLs2Ot9-il",
        "outputId": "f0c6f171-b14c-4d0c-d5e9-cb24a7f07653",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      },
      "execution_count": 25,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Machine learning is a subset of artificial intelligence that involves the use of algorithms to learn from data and make predictions or decisions without being explicitly programmed. It is a type of computer science that enables machines to learn and improve from experience without being explicitly programmed. Machine learning algorithms can learn from data and make predictions or decisions based\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "Vd8A8JP4Fx3C"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}