"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "import matplotlib.pyplot as plt\n",
+ "from numpy import argmax\n",
+ "\n",
+ "# Assuming perf_df.set_index(\"epoch\").plot() generates a line plot\n",
+ "ax = perf_df.set_index(\"epoch\").plot(\n",
+ " figsize=(10, 5), title=\"METEOR score vs epoch\", grid=True\n",
+ ")\n",
+ "\n",
+ "# Loop through each line to annotate the last point\n",
+ "for line_index, line in enumerate(ax.lines):\n",
+ " # Get the data\n",
+ " xdata, ydata = line.get_data()\n",
+ " for index in range(xdata.size):\n",
+ " ax.annotate(\n",
+ " f\"{ydata[index]:.3f}\",\n",
+ " xy=(xdata[index], ydata[index]),\n",
+ " textcoords=\"offset points\",\n",
+ " xytext=(0, 1 if line_index % 2 == 0 else -10),\n",
+ " ha=\"center\",\n",
+ " )\n",
+ "\n",
+ "plt.tight_layout()\n",
+ "plt.show()"
+ ]
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "application/vnd.databricks.v1+notebook": {
+ "dashboards": [],
+ "environmentMetadata": null,
+ "language": "python",
+ "notebookMetadata": {
+ "mostRecentlyExecutedCommandWithImplicitDF": {
+ "commandId": 2652223487066960,
+ "dataframes": [
+ "_sqldf"
+ ]
+ },
+ "pythonIndentUnit": 4
+ },
+ "notebookName": "00_Data_Analysis",
+ "widgets": {}
+ },
+ "colab": {
+ "gpuType": "T4",
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.9"
+ },
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "036fc5746f43416db18c19ad8fd36677": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "06e806c82c7b4cbea31c5358dd9c3434": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "087b76a8b7514269b1f0ab29b062e444": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_a069d2ab23824f29aa320ac256e2cfe9",
+ "placeholder": "",
+ "style": "IPY_MODEL_06e806c82c7b4cbea31c5358dd9c3434",
+ "value": "Map (num_proc=2): 100%"
+ }
+ },
+ "09b76013aa9e45efb6deb23a7a0d0925": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_dea41c5260884aa6879b5e1d1697b14f",
+ "placeholder": "",
+ "style": "IPY_MODEL_89965917796a4f81b899fdc7685f33df",
+ "value": "config.json: 100%"
+ }
+ },
+ "0a92c56bfa134ef583220d7ef0b13e17": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "0c34be936c8145d3ab41282f30a70713": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "0f8b6bfe16894500838793f2491d403f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "177c78fce95d4b4ab33057c5a048d693": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "1f44c9ce1adf470cbb19784493ed209f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_0c34be936c8145d3ab41282f30a70713",
+ "placeholder": "",
+ "style": "IPY_MODEL_0a92c56bfa134ef583220d7ef0b13e17",
+ "value": "model.safetensors: 100%"
+ }
+ },
+ "201b59ccd9f845e197029b57e424aefc": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "2157f01726d748f8a9ae4a00664430da": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "21db8a77b00d4a4e82fdfa608657531f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "26e4202cca81496a90d15a0dd4ca9cf1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_ba90fdb8822d47dab7ba203bee297f37",
+ "IPY_MODEL_61560ff6a36b44f4a9dfdae5c52791d4",
+ "IPY_MODEL_95fbe66647904c06a20f640630d6dc0e"
+ ],
+ "layout": "IPY_MODEL_57182a263d324a3dbf1471c74290a0d5"
+ }
+ },
+ "27155728b6b84cb199c91c940095d0a8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_6b91feeed5464877991ac2c207aebe7c",
+ "IPY_MODEL_cca8113c54c0495daedce1327bf9c68b",
+ "IPY_MODEL_2e63a29e2f7247bba5beede9a568c99f"
+ ],
+ "layout": "IPY_MODEL_5c9d781c28944f3eb86e2a6d44efdf18"
+ }
+ },
+ "271ddaa553a042d09b6db7b450643d8f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "2a58d04b428c46f4b3dbadd3bc6cd529": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2d18ddf6482c4d97829ac0e5a7b9868f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_9f679ad3ec7f4fe8ad0510ffb57bc2ab",
+ "IPY_MODEL_f2df530d22c74977b249dd9fb5f4829b",
+ "IPY_MODEL_89b2ef0dbfea47ab8e6f8d659e3351d1"
+ ],
+ "layout": "IPY_MODEL_3056b148aa9f4e6e8aa3b61d26886255"
+ }
+ },
+ "2e5087c76f98437cb5dc729230358cba": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2e63a29e2f7247bba5beede9a568c99f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b993eaec6b224440bf80c0958c6fb536",
+ "placeholder": "",
+ "style": "IPY_MODEL_de868e26e7154f62aa86223a539ad421",
+ "value": " 464/464 [00:00<00:00, 27.1kB/s]"
+ }
+ },
+ "2f6c70dd266c4816bfad3fd3d192929a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "30307300bc4e4baf96560e30969a82b6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e36a3f9eff0e4cf68834d66b0213ae96",
+ "placeholder": "",
+ "style": "IPY_MODEL_a0037bdccf254159becde630bee3d1db",
+ "value": "generation_config.json: 100%"
+ }
+ },
+ "3056b148aa9f4e6e8aa3b61d26886255": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "30cdc32298134cb0be4d41615b9e5774": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "3572201bd4d74a58b7a665f9bdfdcdba": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "35b0e8c26d6640e9bd0ed7b242a423d8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_2e5087c76f98437cb5dc729230358cba",
+ "max": 51760,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_036fc5746f43416db18c19ad8fd36677",
+ "value": 51760
+ }
+ },
+ "36166c7bcb854b34aca1f41a5d6ea50b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "370692d819df41828b48c4ad446f977b": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "39b29a75374b45c0a22506010be2b84e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_30cdc32298134cb0be4d41615b9e5774",
+ "max": 1179,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_47928317548c454bba6358ab132e8dee",
+ "value": 1179
+ }
+ },
+ "3cf2dd993b5e4d3daecf61e4bab5a404": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_087b76a8b7514269b1f0ab29b062e444",
+ "IPY_MODEL_35b0e8c26d6640e9bd0ed7b242a423d8",
+ "IPY_MODEL_54ad89e05fd74576b9b8b5b5a10eaf8d"
+ ],
+ "layout": "IPY_MODEL_a41dc44766444a998bec2d777f249d23"
+ }
+ },
+ "43dec2ede91341f5af60eb522e18e984": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4463edd481c1467f914c7dcd6c6e6ffc": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "47928317548c454bba6358ab132e8dee": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "49277aeeac16434a865a4d12308b1abc": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4ae7e449e4ea4c729b5f34607c18ebae": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4b2061b8a73c43ffb0c2f83daf0d0183": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4c4c88d4c701450692fa0f6b0c5764b0": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4c666f4ace3943f8b80ecd20e7503236": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "4ccedf0d93094e63b57a0f8a434fba06": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4463edd481c1467f914c7dcd6c6e6ffc",
+ "max": 44307561,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_6d3b9a05db0b4dadb638c686faa0c40a",
+ "value": 44307561
+ }
+ },
+ "4dcf6ff672d24983a1877a8431709aa9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_5807d5fb827d490fb3bc698f801ffff5",
+ "placeholder": "",
+ "style": "IPY_MODEL_c4f2b06a82fd4987b8b659524a7b503b",
+ "value": "Generating train split: 100%"
+ }
+ },
+ "4ea63adfce694725bdba878aef709dd3": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5234566b1bfc4655b8d582ea5b46ed9f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "54ad89e05fd74576b9b8b5b5a10eaf8d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_fdb1941405ed4e4aa06019933892deb3",
+ "placeholder": "",
+ "style": "IPY_MODEL_668d5377ca56426a99753867e6e24862",
+ "value": " 51760/51760 [01:02<00:00, 1131.51 examples/s]"
+ }
+ },
+ "56aee4853b7740e6a977254f5d1fa66d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "57182a263d324a3dbf1471c74290a0d5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5807d5fb827d490fb3bc698f801ffff5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5c9d781c28944f3eb86e2a6d44efdf18": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5f40db8173dd4d76b6ef5ed6d9ec8b6e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "61560ff6a36b44f4a9dfdae5c52791d4": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_db19fc8d37db4e45a5790a876836d8c4",
+ "max": 11610,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_36166c7bcb854b34aca1f41a5d6ea50b",
+ "value": 11610
+ }
+ },
+ "6578fd7acdb54c4c93528ea431fd0144": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_370692d819df41828b48c4ad446f977b",
+ "placeholder": "",
+ "style": "IPY_MODEL_a0bf9160eb2647409b3200270914b90f",
+ "value": " 50.6k/50.6k [00:00<00:00, 2.71MB/s]"
+ }
+ },
+ "668d5377ca56426a99753867e6e24862": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "697f027529b54ee9956bae78a11e0611": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "69ac12aec0714318bf2c83d4f4e745f5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "6b2012c3f88547af8884a9ea90e3164b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_938f45f1b3e24118b815d96ae34ba86a",
+ "placeholder": "",
+ "style": "IPY_MODEL_9367047a800747f79c6b225d92397846",
+ "value": " 44.3M/44.3M [00:01<00:00, 31.0MB/s]"
+ }
+ },
+ "6b91feeed5464877991ac2c207aebe7c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4b2061b8a73c43ffb0c2f83daf0d0183",
+ "placeholder": "",
+ "style": "IPY_MODEL_69ac12aec0714318bf2c83d4f4e745f5",
+ "value": "special_tokens_map.json: 100%"
+ }
+ },
+ "6d3b9a05db0b4dadb638c686faa0c40a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "6dbbedeca9314e66ae50e44ffa31a414": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "6e34619b45934040b6092e6fb01ea7fe": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "71ce208e20d6483abb9ed923510c86d7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d69dc491b3ab44d7852b21873ed7bb7f",
+ "placeholder": "",
+ "style": "IPY_MODEL_f401d53bf28e44eb906bce6c05412662",
+ "value": " 51760/51760 [00:01<00:00, 45512.81 examples/s]"
+ }
+ },
+ "7358cdad832342c983e31efb8754ab78": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "73e352a3404f4c7dad0737f57d29e92f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_988a0e8c1f89446086858da0a891a79c",
+ "IPY_MODEL_4ccedf0d93094e63b57a0f8a434fba06",
+ "IPY_MODEL_6b2012c3f88547af8884a9ea90e3164b"
+ ],
+ "layout": "IPY_MODEL_7e29cb8dd4df4d5b94407cd8fd3f2011"
+ }
+ },
+ "74501720ac7e4dbb911a4a99b3633bc6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "78e5400bff924a92a4cc61c4ff18b182": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b9b313fd861948f5aba25b24b1518d30",
+ "placeholder": "",
+ "style": "IPY_MODEL_4c666f4ace3943f8b80ecd20e7503236",
+ "value": " 1.18k/1.18k [00:00<00:00, 31.3kB/s]"
+ }
+ },
+ "7975adbc2ec5489ea7fa0167e620d85c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_6e34619b45934040b6092e6fb01ea7fe",
+ "max": 51760,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_271ddaa553a042d09b6db7b450643d8f",
+ "value": 51760
+ }
+ },
+ "7e29cb8dd4df4d5b94407cd8fd3f2011": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "810ff6c0e17d4fa09a30fef27eacff90": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "89965917796a4f81b899fdc7685f33df": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "89b2ef0dbfea47ab8e6f8d659e3351d1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b8908fa0df3743ecb9d12983a739104f",
+ "placeholder": "",
+ "style": "IPY_MODEL_177c78fce95d4b4ab33057c5a048d693",
+ "value": " 9.09M/9.09M [00:00<00:00, 32.6MB/s]"
+ }
+ },
+ "8b3505352a5a42bf910428c40ce40465": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_49277aeeac16434a865a4d12308b1abc",
+ "placeholder": "",
+ "style": "IPY_MODEL_2157f01726d748f8a9ae4a00664430da",
+ "value": " 5.70G/5.70G [01:02<00:00, 30.1MB/s]"
+ }
+ },
+ "8fc142b628fb40568730234de1cafde2": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4ae7e449e4ea4c729b5f34607c18ebae",
+ "max": 172,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_3572201bd4d74a58b7a665f9bdfdcdba",
+ "value": 172
+ }
+ },
+ "9367047a800747f79c6b225d92397846": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "938f45f1b3e24118b815d96ae34ba86a": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "95fbe66647904c06a20f640630d6dc0e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b0a370dc20654b279b9680692e34418e",
+ "placeholder": "",
+ "style": "IPY_MODEL_cfeb365ddf7548d58b2557f22737fcf5",
+ "value": " 11.6k/11.6k [00:00<00:00, 716kB/s]"
+ }
+ },
+ "988a0e8c1f89446086858da0a891a79c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_ad2be500fc164c0f86f33e914ef8e6a0",
+ "placeholder": "",
+ "style": "IPY_MODEL_5234566b1bfc4655b8d582ea5b46ed9f",
+ "value": "Downloading data: 100%"
+ }
+ },
+ "98c58f23f4d549518832cb2d18f796e8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_09b76013aa9e45efb6deb23a7a0d0925",
+ "IPY_MODEL_39b29a75374b45c0a22506010be2b84e",
+ "IPY_MODEL_78e5400bff924a92a4cc61c4ff18b182"
+ ],
+ "layout": "IPY_MODEL_2a58d04b428c46f4b3dbadd3bc6cd529"
+ }
+ },
+ "99fdbb0300c14c139d1937c646f0cfe7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_7358cdad832342c983e31efb8754ab78",
+ "placeholder": "",
+ "style": "IPY_MODEL_e9adf418296e436fb48bb9f78885598b",
+ "value": " 51760/51760 [00:01<00:00, 38665.95 examples/s]"
+ }
+ },
+ "9f679ad3ec7f4fe8ad0510ffb57bc2ab": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4ea63adfce694725bdba878aef709dd3",
+ "placeholder": "",
+ "style": "IPY_MODEL_74501720ac7e4dbb911a4a99b3633bc6",
+ "value": "tokenizer.json: 100%"
+ }
+ },
+ "a0037bdccf254159becde630bee3d1db": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "a069d2ab23824f29aa320ac256e2cfe9": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a0bf9160eb2647409b3200270914b90f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "a41dc44766444a998bec2d777f249d23": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a8464a4c711e4e00aafdfc919b60d07e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_fb995c740590427b882572c81d4e848c",
+ "placeholder": "",
+ "style": "IPY_MODEL_201b59ccd9f845e197029b57e424aefc",
+ "value": " 172/172 [00:00<00:00, 12.0kB/s]"
+ }
+ },
+ "a9f0cc51fc3d4d7b874c32dcf1c5bdf2": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ad2be500fc164c0f86f33e914ef8e6a0": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b0240cd9a4554b29ae11f8051984a1c6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_edaf890370314a218f138015faa0b05d",
+ "placeholder": "",
+ "style": "IPY_MODEL_697f027529b54ee9956bae78a11e0611",
+ "value": "Map: 100%"
+ }
+ },
+ "b0a370dc20654b279b9680692e34418e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b518dcee69074b87be73957cd810e7ed": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d891f8d0b1fc462f8008d02bb2a15692",
+ "placeholder": "",
+ "style": "IPY_MODEL_cced8fd7e998472794f3f3e3018956a5",
+ "value": "tokenizer_config.json: 100%"
+ }
+ },
+ "b8908fa0df3743ecb9d12983a739104f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b993eaec6b224440bf80c0958c6fb536": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b9b313fd861948f5aba25b24b1518d30": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ba90fdb8822d47dab7ba203bee297f37": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_0f8b6bfe16894500838793f2491d403f",
+ "placeholder": "",
+ "style": "IPY_MODEL_bb19f6c747754682a514373a3a0535ba",
+ "value": "Downloading readme: 100%"
+ }
+ },
+ "bb19f6c747754682a514373a3a0535ba": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "bc883d4cf13e4f8b8a4fe5f410cb6efd": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e9159e03e61f4f56978ece9c3bca49b2",
+ "max": 51760,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_810ff6c0e17d4fa09a30fef27eacff90",
+ "value": 51760
+ }
+ },
+ "c161d94df0f04feba9542237e0856c22": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "c22f71b1f85843209d7e5321506b9cb9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_1f44c9ce1adf470cbb19784493ed209f",
+ "IPY_MODEL_f1addc4479d849879e743cf9089e6540",
+ "IPY_MODEL_8b3505352a5a42bf910428c40ce40465"
+ ],
+ "layout": "IPY_MODEL_4c4c88d4c701450692fa0f6b0c5764b0"
+ }
+ },
+ "c4f2b06a82fd4987b8b659524a7b503b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "cca8113c54c0495daedce1327bf9c68b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e02f9b7849c64531835eb77b860d1c93",
+ "max": 464,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_56aee4853b7740e6a977254f5d1fa66d",
+ "value": 464
+ }
+ },
+ "cced8fd7e998472794f3f3e3018956a5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "cf245afeb1c04f29a24d291608c3d157": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_b518dcee69074b87be73957cd810e7ed",
+ "IPY_MODEL_e29104486d594b2992d7285e0ef77371",
+ "IPY_MODEL_6578fd7acdb54c4c93528ea431fd0144"
+ ],
+ "layout": "IPY_MODEL_d35db8148a354c56aaac56dbae22536f"
+ }
+ },
+ "cfe8cae0e22b495bafa221a63d13b283": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "cfeb365ddf7548d58b2557f22737fcf5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "d1b47d39450d4019ae85c9b2f943eeaf": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_4dcf6ff672d24983a1877a8431709aa9",
+ "IPY_MODEL_7975adbc2ec5489ea7fa0167e620d85c",
+ "IPY_MODEL_71ce208e20d6483abb9ed923510c86d7"
+ ],
+ "layout": "IPY_MODEL_cfe8cae0e22b495bafa221a63d13b283"
+ }
+ },
+ "d35db8148a354c56aaac56dbae22536f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d69dc491b3ab44d7852b21873ed7bb7f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d891f8d0b1fc462f8008d02bb2a15692": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d8e5318cead340c4adbeaccc05d39225": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "daf4cd890b35422683d22fd30bc71e83": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_b0240cd9a4554b29ae11f8051984a1c6",
+ "IPY_MODEL_bc883d4cf13e4f8b8a4fe5f410cb6efd",
+ "IPY_MODEL_99fdbb0300c14c139d1937c646f0cfe7"
+ ],
+ "layout": "IPY_MODEL_c161d94df0f04feba9542237e0856c22"
+ }
+ },
+ "db19fc8d37db4e45a5790a876836d8c4": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "de868e26e7154f62aa86223a539ad421": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "dea41c5260884aa6879b5e1d1697b14f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e02f9b7849c64531835eb77b860d1c93": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e29104486d594b2992d7285e0ef77371": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_a9f0cc51fc3d4d7b874c32dcf1c5bdf2",
+ "max": 50641,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_2f6c70dd266c4816bfad3fd3d192929a",
+ "value": 50641
+ }
+ },
+ "e36a3f9eff0e4cf68834d66b0213ae96": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e9159e03e61f4f56978ece9c3bca49b2": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e9adf418296e436fb48bb9f78885598b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "edaf890370314a218f138015faa0b05d": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "f1addc4479d849879e743cf9089e6540": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_43dec2ede91341f5af60eb522e18e984",
+ "max": 5702746405,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_d8e5318cead340c4adbeaccc05d39225",
+ "value": 5702746405
+ }
+ },
+ "f2df530d22c74977b249dd9fb5f4829b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_21db8a77b00d4a4e82fdfa608657531f",
+ "max": 9085698,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_6dbbedeca9314e66ae50e44ffa31a414",
+ "value": 9085698
+ }
+ },
+ "f401d53bf28e44eb906bce6c05412662": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "fb995c740590427b882572c81d4e848c": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "fce7a61c25ec4390af43d92b7c473a45": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_30307300bc4e4baf96560e30969a82b6",
+ "IPY_MODEL_8fc142b628fb40568730234de1cafde2",
+ "IPY_MODEL_a8464a4c711e4e00aafdfc919b60d07e"
+ ],
+ "layout": "IPY_MODEL_5f40db8173dd4d76b6ef5ed6d9ec8b6e"
+ }
+ },
+ "fdb1941405ed4e4aa06019933892deb3": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ }
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/notebooks/01_Qwen2-0.5B_Unsloth.ipynb b/notebooks/01_Qwen2-0.5B_Unsloth.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..7e9bcb68563636d461948106258c4814520fb330
--- /dev/null
+++ b/notebooks/01_Qwen2-0.5B_Unsloth.ipynb
@@ -0,0 +1,5072 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "8fe0634c-c88a-4c89-b956-1f4247c2d503",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [],
+ "source": [
+ "%load_ext autoreload\n",
+ "%autoreload 2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "e2fc659d-8a38-4a94-bf3c-81b2778c780a",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "workding dir: /home/inflaton/code/projects/courses/novel-translation\n"
+ ]
+ }
+ ],
+ "source": [
+ "import os\n",
+ "import sys\n",
+ "from pathlib import Path\n",
+ "\n",
+ "workding_dir = str(Path.cwd().parent)\n",
+ "os.chdir(workding_dir)\n",
+ "sys.path.append(workding_dir)\n",
+ "print(\"workding dir:\", workding_dir)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "6601e3ff-e856-4353-98d8-42fcb158f230",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "loading env vars from: /home/inflaton/code/projects/courses/novel-translation/.env\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "True"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from dotenv import find_dotenv, load_dotenv\n",
+ "\n",
+ "found_dotenv = find_dotenv(\".env\")\n",
+ "\n",
+ "if len(found_dotenv) == 0:\n",
+ " found_dotenv = find_dotenv(\".env.example\")\n",
+ "print(f\"loading env vars from: {found_dotenv}\")\n",
+ "load_dotenv(found_dotenv, override=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "f30d283f-4759-403b-8cc4-94e360a76c04",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "('unsloth/Qwen2-0.5B-Instruct',\n",
+ " True,\n",
+ " 'models/Qwen2-0.5B-Instruct-MAC-',\n",
+ " 'Qwen2-0.5B-Instruct-MAC-',\n",
+ " 2048,\n",
+ " 10,\n",
+ " None,\n",
+ " 'datasets/mac/mac.tsv')"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import os\n",
+ "\n",
+ "model_name = os.getenv(\"MODEL_NAME\")\n",
+ "token = os.getenv(\"HF_TOKEN\") or None\n",
+ "load_in_4bit = os.getenv(\"LOAD_IN_4BIT\") == \"true\"\n",
+ "local_model = os.getenv(\"LOCAL_MODEL\")\n",
+ "hub_model = os.getenv(\"HUB_MODEL\")\n",
+ "num_train_epochs = int(os.getenv(\"NUM_TRAIN_EPOCHS\") or 0)\n",
+ "data_path = os.getenv(\"DATA_PATH\")\n",
+ "\n",
+ "max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!\n",
+ "dtype = (\n",
+ " None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+\n",
+ ")\n",
+ "\n",
+ "model_name, load_in_4bit, local_model, hub_model, max_seq_length, num_train_epochs, dtype, data_path"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "6f80b3a1-a6e6-43c7-b54f-da10ef37df32",
+ "showTitle": false,
+ "title": ""
+ },
+ "id": "r2v_X2fA0Df5"
+ },
+ "source": [
+ "* We support Llama, Mistral, Phi-3, Gemma, Yi, DeepSeek, Qwen, TinyLlama, Vicuna, Open Hermes etc\n",
+ "* We support 16bit LoRA or 4bit QLoRA. Both 2x faster.\n",
+ "* `max_seq_length` can be set to anything, since we do automatic RoPE Scaling via [kaiokendev's](https://kaiokendev.github.io/til) method.\n",
+ "* With [PR 26037](https://github.com/huggingface/transformers/pull/26037), we support downloading 4bit models **4x faster**! [Our repo](https://huggingface.co/unsloth) has Llama, Mistral 4bit models.\n",
+ "* [**NEW**] We make Phi-3 Medium / Mini **2x faster**! See our [Phi-3 Medium notebook](https://colab.research.google.com/drive/1hhdhBa1j_hsymiW9m-WzxQtgqTH_NHqi?usp=sharing)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "9df0e65d-07a4-4d5e-8848-c41872280e6f",
+ "showTitle": false,
+ "title": ""
+ },
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 353,
+ "referenced_widgets": [
+ "98c58f23f4d549518832cb2d18f796e8",
+ "09b76013aa9e45efb6deb23a7a0d0925",
+ "39b29a75374b45c0a22506010be2b84e",
+ "78e5400bff924a92a4cc61c4ff18b182",
+ "2a58d04b428c46f4b3dbadd3bc6cd529",
+ "dea41c5260884aa6879b5e1d1697b14f",
+ "89965917796a4f81b899fdc7685f33df",
+ "30cdc32298134cb0be4d41615b9e5774",
+ "47928317548c454bba6358ab132e8dee",
+ "b9b313fd861948f5aba25b24b1518d30",
+ "4c666f4ace3943f8b80ecd20e7503236",
+ "c22f71b1f85843209d7e5321506b9cb9",
+ "1f44c9ce1adf470cbb19784493ed209f",
+ "f1addc4479d849879e743cf9089e6540",
+ "8b3505352a5a42bf910428c40ce40465",
+ "4c4c88d4c701450692fa0f6b0c5764b0",
+ "0c34be936c8145d3ab41282f30a70713",
+ "0a92c56bfa134ef583220d7ef0b13e17",
+ "43dec2ede91341f5af60eb522e18e984",
+ "d8e5318cead340c4adbeaccc05d39225",
+ "49277aeeac16434a865a4d12308b1abc",
+ "2157f01726d748f8a9ae4a00664430da",
+ "fce7a61c25ec4390af43d92b7c473a45",
+ "30307300bc4e4baf96560e30969a82b6",
+ "8fc142b628fb40568730234de1cafde2",
+ "a8464a4c711e4e00aafdfc919b60d07e",
+ "5f40db8173dd4d76b6ef5ed6d9ec8b6e",
+ "e36a3f9eff0e4cf68834d66b0213ae96",
+ "a0037bdccf254159becde630bee3d1db",
+ "4ae7e449e4ea4c729b5f34607c18ebae",
+ "3572201bd4d74a58b7a665f9bdfdcdba",
+ "fb995c740590427b882572c81d4e848c",
+ "201b59ccd9f845e197029b57e424aefc",
+ "cf245afeb1c04f29a24d291608c3d157",
+ "b518dcee69074b87be73957cd810e7ed",
+ "e29104486d594b2992d7285e0ef77371",
+ "6578fd7acdb54c4c93528ea431fd0144",
+ "d35db8148a354c56aaac56dbae22536f",
+ "d891f8d0b1fc462f8008d02bb2a15692",
+ "cced8fd7e998472794f3f3e3018956a5",
+ "a9f0cc51fc3d4d7b874c32dcf1c5bdf2",
+ "2f6c70dd266c4816bfad3fd3d192929a",
+ "370692d819df41828b48c4ad446f977b",
+ "a0bf9160eb2647409b3200270914b90f",
+ "2d18ddf6482c4d97829ac0e5a7b9868f",
+ "9f679ad3ec7f4fe8ad0510ffb57bc2ab",
+ "f2df530d22c74977b249dd9fb5f4829b",
+ "89b2ef0dbfea47ab8e6f8d659e3351d1",
+ "3056b148aa9f4e6e8aa3b61d26886255",
+ "4ea63adfce694725bdba878aef709dd3",
+ "74501720ac7e4dbb911a4a99b3633bc6",
+ "21db8a77b00d4a4e82fdfa608657531f",
+ "6dbbedeca9314e66ae50e44ffa31a414",
+ "b8908fa0df3743ecb9d12983a739104f",
+ "177c78fce95d4b4ab33057c5a048d693",
+ "27155728b6b84cb199c91c940095d0a8",
+ "6b91feeed5464877991ac2c207aebe7c",
+ "cca8113c54c0495daedce1327bf9c68b",
+ "2e63a29e2f7247bba5beede9a568c99f",
+ "5c9d781c28944f3eb86e2a6d44efdf18",
+ "4b2061b8a73c43ffb0c2f83daf0d0183",
+ "69ac12aec0714318bf2c83d4f4e745f5",
+ "e02f9b7849c64531835eb77b860d1c93",
+ "56aee4853b7740e6a977254f5d1fa66d",
+ "b993eaec6b224440bf80c0958c6fb536",
+ "de868e26e7154f62aa86223a539ad421"
+ ]
+ },
+ "id": "QmUBVEnvCDJv",
+ "outputId": "a0e2d781-4934-415a-90b4-35165b9e44c5"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "==((====))== Unsloth: Fast Qwen2 patching release 2024.5\n",
+ " \\\\ /| GPU: NVIDIA GeForce RTX 4080 Laptop GPU. Max memory: 11.994 GB. Platform = Linux.\n",
+ "O^O/ \\_/ \\ Pytorch: 2.2.2+cu121. CUDA = 8.9. CUDA Toolkit = 12.1.\n",
+ "\\ / Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = False.\n",
+ " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "CPU times: user 10.6 s, sys: 2.07 s, total: 12.6 s\n",
+ "Wall time: 51.9 s\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "from llm_toolkit.translation_engine import *\n",
+ "\n",
+ "model, tokenizer = load_model(model_name, load_in_4bit)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "adfadd0f-8c01-4f67-b643-cff930c1ce00",
+ "showTitle": false,
+ "title": ""
+ },
+ "id": "SXd9bTZd1aaL"
+ },
+ "source": [
+ "We now add LoRA adapters so we only need to update 1 to 10% of all parameters!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "2cd85242-237f-4cca-a706-b7664ec9d3e5",
+ "showTitle": false,
+ "title": ""
+ },
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "6bZsfBuZDeCL",
+ "outputId": "bc6d9ce7-f82a-4191-d0c5-ec8247d9b9eb"
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Unsloth 2024.5 patched 24 layers with 0 QKV layers, 24 O layers and 24 MLP layers.\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "CPU times: user 9.31 s, sys: 0 ns, total: 9.31 s\n",
+ "Wall time: 2.12 s\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "model = FastLanguageModel.get_peft_model(\n",
+ " model,\n",
+ " r=16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128\n",
+ " target_modules=[\n",
+ " \"q_proj\",\n",
+ " \"k_proj\",\n",
+ " \"v_proj\",\n",
+ " \"o_proj\",\n",
+ " \"gate_proj\",\n",
+ " \"up_proj\",\n",
+ " \"down_proj\",\n",
+ " ],\n",
+ " lora_alpha=16,\n",
+ " lora_dropout=0, # Supports any, but = 0 is optimized\n",
+ " bias=\"none\", # Supports any, but = \"none\" is optimized\n",
+ " # [NEW] \"unsloth\" uses 30% less VRAM, fits 2x larger batch sizes!\n",
+ " use_gradient_checkpointing=\"unsloth\", # True or \"unsloth\" for very long context\n",
+ " random_state=3407,\n",
+ " use_rslora=False, # We support rank stabilized LoRA\n",
+ " loftq_config=None, # And LoftQ\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "2c3fdf26-130f-4ce7-9c51-d62e1ce17629",
+ "showTitle": false,
+ "title": ""
+ },
+ "id": "vITh0KVJ10qX"
+ },
+ "source": [
+ " \n",
+ "### Data Prep\n",
+ "We now use the Alpaca dataset from [yahma](https://huggingface.co/datasets/yahma/alpaca-cleaned), which is a filtered version of 52K of the original [Alpaca dataset](https://crfm.stanford.edu/2023/03/13/alpaca.html). You can replace this code section with your own data prep.\n",
+ "\n",
+ "**[NOTE]** To train only on completions (ignoring the user's input) read TRL's docs [here](https://huggingface.co/docs/trl/sft_trainer#train-on-completions-only).\n",
+ "\n",
+ "**[NOTE]** Remember to add the **EOS_TOKEN** to the tokenized output!! Otherwise you'll get infinite generations!\n",
+ "\n",
+ "If you want to use the `llama-3` template for ShareGPT datasets, try our conversational [notebook](https://colab.research.google.com/drive/1XamvWYinY6FOSX9GLvnqSjjsNflxdhNc?usp=sharing).\n",
+ "\n",
+ "For text completions like novel writing, try this [notebook](https://colab.research.google.com/drive/1ef-tab5bhkvWmBOObepl1WgJvfvSzn5Q?usp=sharing)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "a378fd31-1620-42bc-b97a-82f4ffbdcb11",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "loading train/test data files\n",
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n"
+ ]
+ }
+ ],
+ "source": [
+ "import os\n",
+ "from llm_toolkit.translation_engine import *\n",
+ "\n",
+ "datasets = load_translation_dataset(data_path, tokenizer)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "6ac9ac82-aaf3-482b-8cdb-8eab19fde5ae",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "({'chinese': '全仗着狐仙搭救。',\n",
+ " 'english': 'Because I was protected by a fox fairy.',\n",
+ " 'text': '<|im_start|>system\\nYou are an expert in translating Chinese into English.<|im_end|>\\n<|im_start|>user\\nTranslate from Chinese to English.\\n全仗着狐仙搭救。<|im_end|>\\n<|im_start|>assistant\\nBecause I was protected by a fox fairy.<|im_end|>',\n",
+ " 'prompt': '<|im_start|>system\\nYou are an expert in translating Chinese into English.<|im_end|>\\n<|im_start|>user\\nTranslate from Chinese to English.\\n全仗着狐仙搭救。<|im_end|>\\n<|im_start|>assistant\\n'},\n",
+ " {'chinese': '老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞迸着,嚓嚓有声。',\n",
+ " 'english': 'Old Geng picked up his shotgun, squinted, and pulled the trigger. Two sparrows crashed to the ground like hailstones as shotgun pellets tore noisily through the branches.',\n",
+ " 'text': '<|im_start|>system\\nYou are an expert in translating Chinese into English.<|im_end|>\\n<|im_start|>user\\nTranslate from Chinese to English.\\n老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞迸着,嚓嚓有声。<|im_end|>\\n<|im_start|>assistant\\nOld Geng picked up his shotgun, squinted, and pulled the trigger. Two sparrows crashed to the ground like hailstones as shotgun pellets tore noisily through the branches.<|im_end|>',\n",
+ " 'prompt': '<|im_start|>system\\nYou are an expert in translating Chinese into English.<|im_end|>\\n<|im_start|>user\\nTranslate from Chinese to English.\\n老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞迸着,嚓嚓有声。<|im_end|>\\n<|im_start|>assistant\\n'})"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "datasets[\"train\"][0], datasets[\"test\"][0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "87f73aab-12df-4e4e-b758-ee055e17ed58",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "({'chinese': '周瑞家的道:“太太说:‘他们原不是一家子; 当年他们的祖和太老爷在一处做官,因连了宗的。',\n",
+ " 'english': \"'She said they don't really belong to the family but were adopted into the clan years ago when your grandfather and theirs were working in the same office.\",\n",
+ " 'text': \"<|im_start|>system\\nYou are an expert in translating Chinese into English.<|im_end|>\\n<|im_start|>user\\nTranslate from Chinese to English.\\n周瑞家的道:“太太说:‘他们原不是一家子; 当年他们的祖和太老爷在一处做官,因连了宗的。<|im_end|>\\n<|im_start|>assistant\\n'She said they don't really belong to the family but were adopted into the clan years ago when your grandfather and theirs were working in the same office.<|im_end|>\",\n",
+ " 'prompt': '<|im_start|>system\\nYou are an expert in translating Chinese into English.<|im_end|>\\n<|im_start|>user\\nTranslate from Chinese to English.\\n周瑞家的道:“太太说:‘他们原不是一家子; 当年他们的祖和太老爷在一处做官,因连了宗的。<|im_end|>\\n<|im_start|>assistant\\n'},\n",
+ " {'chinese': '“听到了吗?',\n",
+ " 'english': \"'Did you hear that?'\",\n",
+ " 'text': \"<|im_start|>system\\nYou are an expert in translating Chinese into English.<|im_end|>\\n<|im_start|>user\\nTranslate from Chinese to English.\\n“听到了吗?<|im_end|>\\n<|im_start|>assistant\\n'Did you hear that?'<|im_end|>\",\n",
+ " 'prompt': '<|im_start|>system\\nYou are an expert in translating Chinese into English.<|im_end|>\\n<|im_start|>user\\nTranslate from Chinese to English.\\n“听到了吗?<|im_end|>\\n<|im_start|>assistant\\n'})"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "datasets[\"train\"][1000], datasets[\"test\"][1000]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "dd425707-88cc-43db-8bc0-e858c8084e16",
+ "showTitle": false,
+ "title": ""
+ },
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 145,
+ "referenced_widgets": [
+ "26e4202cca81496a90d15a0dd4ca9cf1",
+ "ba90fdb8822d47dab7ba203bee297f37",
+ "61560ff6a36b44f4a9dfdae5c52791d4",
+ "95fbe66647904c06a20f640630d6dc0e",
+ "57182a263d324a3dbf1471c74290a0d5",
+ "0f8b6bfe16894500838793f2491d403f",
+ "bb19f6c747754682a514373a3a0535ba",
+ "db19fc8d37db4e45a5790a876836d8c4",
+ "36166c7bcb854b34aca1f41a5d6ea50b",
+ "b0a370dc20654b279b9680692e34418e",
+ "cfeb365ddf7548d58b2557f22737fcf5",
+ "73e352a3404f4c7dad0737f57d29e92f",
+ "988a0e8c1f89446086858da0a891a79c",
+ "4ccedf0d93094e63b57a0f8a434fba06",
+ "6b2012c3f88547af8884a9ea90e3164b",
+ "7e29cb8dd4df4d5b94407cd8fd3f2011",
+ "ad2be500fc164c0f86f33e914ef8e6a0",
+ "5234566b1bfc4655b8d582ea5b46ed9f",
+ "4463edd481c1467f914c7dcd6c6e6ffc",
+ "6d3b9a05db0b4dadb638c686faa0c40a",
+ "938f45f1b3e24118b815d96ae34ba86a",
+ "9367047a800747f79c6b225d92397846",
+ "d1b47d39450d4019ae85c9b2f943eeaf",
+ "4dcf6ff672d24983a1877a8431709aa9",
+ "7975adbc2ec5489ea7fa0167e620d85c",
+ "71ce208e20d6483abb9ed923510c86d7",
+ "cfe8cae0e22b495bafa221a63d13b283",
+ "5807d5fb827d490fb3bc698f801ffff5",
+ "c4f2b06a82fd4987b8b659524a7b503b",
+ "6e34619b45934040b6092e6fb01ea7fe",
+ "271ddaa553a042d09b6db7b450643d8f",
+ "d69dc491b3ab44d7852b21873ed7bb7f",
+ "f401d53bf28e44eb906bce6c05412662",
+ "daf4cd890b35422683d22fd30bc71e83",
+ "b0240cd9a4554b29ae11f8051984a1c6",
+ "bc883d4cf13e4f8b8a4fe5f410cb6efd",
+ "99fdbb0300c14c139d1937c646f0cfe7",
+ "c161d94df0f04feba9542237e0856c22",
+ "edaf890370314a218f138015faa0b05d",
+ "697f027529b54ee9956bae78a11e0611",
+ "e9159e03e61f4f56978ece9c3bca49b2",
+ "810ff6c0e17d4fa09a30fef27eacff90",
+ "7358cdad832342c983e31efb8754ab78",
+ "e9adf418296e436fb48bb9f78885598b"
+ ]
+ },
+ "id": "LjY75GoYUCB8",
+ "outputId": "7e2045fb-9ce9-49b1-b6e7-d5c9bc92455c"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "<|im_start|>system\n",
+ "You are an expert in translating Chinese into English.<|im_end|>\n",
+ "<|im_start|>user\n",
+ "Translate from Chinese to English.\n",
+ "“听到了吗?<|im_end|>\n",
+ "<|im_start|>assistant\n",
+ "\n",
+ "----------------------------------------\n",
+ "<|im_start|>system\n",
+ "You are an expert in translating Chinese into English.<|im_end|>\n",
+ "<|im_start|>user\n",
+ "Translate from Chinese to English.\n",
+ "“听到了吗?<|im_end|>\n",
+ "<|im_start|>assistant\n",
+ "Did you hear it?<|im_end|>\n",
+ "CPU times: user 1.62 s, sys: 160 ms, total: 1.78 s\n",
+ "Wall time: 1.8 s\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "prompt1 = datasets[\"test\"][\"prompt\"][1000]\n",
+ "print(prompt1)\n",
+ "print(\"--\" * 20)\n",
+ "test_model(model, tokenizer, prompt1)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "53cc91a1-1623-4197-bf82-78cdadad933e",
+ "showTitle": false,
+ "title": ""
+ },
+ "id": "idAEIeSQ3xdS"
+ },
+ "source": [
+ " \n",
+ "### Train the model\n",
+ "Now let's use Huggingface TRL's `SFTTrainer`! More docs here: [TRL SFT docs](https://huggingface.co/docs/trl/sft_trainer). We do 60 steps to speed things up, but you can set `num_train_epochs=1` for a full run, and turn off `max_steps=None`. We also support TRL's `DPOTrainer`!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "f1053974-9253-4d4d-a172-1f5fea046745",
+ "showTitle": false,
+ "title": ""
+ },
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 122,
+ "referenced_widgets": [
+ "3cf2dd993b5e4d3daecf61e4bab5a404",
+ "087b76a8b7514269b1f0ab29b062e444",
+ "35b0e8c26d6640e9bd0ed7b242a423d8",
+ "54ad89e05fd74576b9b8b5b5a10eaf8d",
+ "a41dc44766444a998bec2d777f249d23",
+ "a069d2ab23824f29aa320ac256e2cfe9",
+ "06e806c82c7b4cbea31c5358dd9c3434",
+ "2e5087c76f98437cb5dc729230358cba",
+ "036fc5746f43416db18c19ad8fd36677",
+ "fdb1941405ed4e4aa06019933892deb3",
+ "668d5377ca56426a99753867e6e24862"
+ ]
+ },
+ "id": "95_Nn-89DhsL",
+ "outputId": "bce9db22-b022-4e43-de3f-c7ea4c9c3c4e"
+ },
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "c9df9c466cc24f5e8715c02eb6764c3c",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Map (num_proc=2): 0%| | 0/4528 [00:00, ? examples/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "from trl import SFTTrainer\n",
+ "from llm_toolkit.transformers import TrainingArguments\n",
+ "from unsloth import is_bfloat16_supported\n",
+ "\n",
+ "trainer = SFTTrainer(\n",
+ " model=model,\n",
+ " tokenizer=tokenizer,\n",
+ " train_dataset=datasets[\"train\"],\n",
+ " dataset_text_field=\"text\",\n",
+ " max_seq_length=max_seq_length,\n",
+ " dataset_num_proc=2,\n",
+ " packing=False, # Can make training 5x faster for short sequences.\n",
+ " args=TrainingArguments(\n",
+ " per_device_train_batch_size=2,\n",
+ " gradient_accumulation_steps=4,\n",
+ " warmup_steps=5,\n",
+ " num_train_epochs=num_train_epochs,\n",
+ " learning_rate=2e-4,\n",
+ " fp16=not is_bfloat16_supported(),\n",
+ " bf16=is_bfloat16_supported(),\n",
+ " logging_steps=100,\n",
+ " optim=\"adamw_8bit\",\n",
+ " weight_decay=0.01,\n",
+ " lr_scheduler_type=\"linear\",\n",
+ " seed=3407,\n",
+ " output_dir=\"outputs\",\n",
+ " ),\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "db089cc7-e333-42ae-a468-bed571bb4214",
+ "showTitle": false,
+ "title": ""
+ },
+ "cellView": "form",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "2ejIt2xSNKKp",
+ "outputId": "c73d8dfa-f4a1-4a01-a6dc-018bf82516a2"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "1.199 GB of memory reserved.\n"
+ ]
+ }
+ ],
+ "source": [
+ "# @title Show current memory stats\n",
+ "import torch\n",
+ "\n",
+ "gpu_stats = torch.cuda.get_device_properties(0)\n",
+ "start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
+ "max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)\n",
+ "print(f\"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.\")\n",
+ "print(f\"{start_gpu_memory} GB of memory reserved.\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "412a7db0-6980-4834-bd87-fb15b794eb75",
+ "showTitle": false,
+ "title": ""
+ },
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000
+ },
+ "id": "yqxqAZ7KJ4oL",
+ "outputId": "69117b9b-b6f8-4d0e-c262-6998ba2c46bd"
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n",
+ " \\\\ /| Num examples = 4,528 | Num Epochs = 10\n",
+ "O^O/ \\_/ \\ Batch size per device = 2 | Gradient Accumulation steps = 4\n",
+ "\\ / Total batch size = 8 | Total steps = 5,660\n",
+ " \"-____-\" Number of trainable parameters = 8,798,208\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ "
\n",
+ " [5660/5660 1:02:58, Epoch 10/10]\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Step \n",
+ " Training Loss \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 100 \n",
+ " 2.137700 \n",
+ " \n",
+ " \n",
+ " 200 \n",
+ " 2.001500 \n",
+ " \n",
+ " \n",
+ " 300 \n",
+ " 1.938200 \n",
+ " \n",
+ " \n",
+ " 400 \n",
+ " 1.935400 \n",
+ " \n",
+ " \n",
+ " 500 \n",
+ " 1.899800 \n",
+ " \n",
+ " \n",
+ " 600 \n",
+ " 1.819500 \n",
+ " \n",
+ " \n",
+ " 700 \n",
+ " 1.689600 \n",
+ " \n",
+ " \n",
+ " 800 \n",
+ " 1.737300 \n",
+ " \n",
+ " \n",
+ " 900 \n",
+ " 1.665900 \n",
+ " \n",
+ " \n",
+ " 1000 \n",
+ " 1.664600 \n",
+ " \n",
+ " \n",
+ " 1100 \n",
+ " 1.723000 \n",
+ " \n",
+ " \n",
+ " 1200 \n",
+ " 1.520200 \n",
+ " \n",
+ " \n",
+ " 1300 \n",
+ " 1.381000 \n",
+ " \n",
+ " \n",
+ " 1400 \n",
+ " 1.423000 \n",
+ " \n",
+ " \n",
+ " 1500 \n",
+ " 1.419400 \n",
+ " \n",
+ " \n",
+ " 1600 \n",
+ " 1.436500 \n",
+ " \n",
+ " \n",
+ " 1700 \n",
+ " 1.401500 \n",
+ " \n",
+ " \n",
+ " 1800 \n",
+ " 1.119500 \n",
+ " \n",
+ " \n",
+ " 1900 \n",
+ " 1.130700 \n",
+ " \n",
+ " \n",
+ " 2000 \n",
+ " 1.139100 \n",
+ " \n",
+ " \n",
+ " 2100 \n",
+ " 1.120000 \n",
+ " \n",
+ " \n",
+ " 2200 \n",
+ " 1.166200 \n",
+ " \n",
+ " \n",
+ " 2300 \n",
+ " 1.062000 \n",
+ " \n",
+ " \n",
+ " 2400 \n",
+ " 0.858400 \n",
+ " \n",
+ " \n",
+ " 2500 \n",
+ " 0.846800 \n",
+ " \n",
+ " \n",
+ " 2600 \n",
+ " 0.892000 \n",
+ " \n",
+ " \n",
+ " 2700 \n",
+ " 0.887700 \n",
+ " \n",
+ " \n",
+ " 2800 \n",
+ " 0.907300 \n",
+ " \n",
+ " \n",
+ " 2900 \n",
+ " 0.728300 \n",
+ " \n",
+ " \n",
+ " 3000 \n",
+ " 0.644400 \n",
+ " \n",
+ " \n",
+ " 3100 \n",
+ " 0.652400 \n",
+ " \n",
+ " \n",
+ " 3200 \n",
+ " 0.683000 \n",
+ " \n",
+ " \n",
+ " 3300 \n",
+ " 0.673200 \n",
+ " \n",
+ " \n",
+ " 3400 \n",
+ " 0.670000 \n",
+ " \n",
+ " \n",
+ " 3500 \n",
+ " 0.460900 \n",
+ " \n",
+ " \n",
+ " 3600 \n",
+ " 0.487600 \n",
+ " \n",
+ " \n",
+ " 3700 \n",
+ " 0.501000 \n",
+ " \n",
+ " \n",
+ " 3800 \n",
+ " 0.491400 \n",
+ " \n",
+ " \n",
+ " 3900 \n",
+ " 0.501700 \n",
+ " \n",
+ " \n",
+ " 4000 \n",
+ " 0.452200 \n",
+ " \n",
+ " \n",
+ " 4100 \n",
+ " 0.352000 \n",
+ " \n",
+ " \n",
+ " 4200 \n",
+ " 0.368500 \n",
+ " \n",
+ " \n",
+ " 4300 \n",
+ " 0.368500 \n",
+ " \n",
+ " \n",
+ " 4400 \n",
+ " 0.360600 \n",
+ " \n",
+ " \n",
+ " 4500 \n",
+ " 0.374900 \n",
+ " \n",
+ " \n",
+ " 4600 \n",
+ " 0.294500 \n",
+ " \n",
+ " \n",
+ " 4700 \n",
+ " 0.270000 \n",
+ " \n",
+ " \n",
+ " 4800 \n",
+ " 0.270800 \n",
+ " \n",
+ " \n",
+ " 4900 \n",
+ " 0.285300 \n",
+ " \n",
+ " \n",
+ " 5000 \n",
+ " 0.282200 \n",
+ " \n",
+ " \n",
+ " 5100 \n",
+ " 0.285100 \n",
+ " \n",
+ " \n",
+ " 5200 \n",
+ " 0.216900 \n",
+ " \n",
+ " \n",
+ " 5300 \n",
+ " 0.228700 \n",
+ " \n",
+ " \n",
+ " 5400 \n",
+ " 0.223900 \n",
+ " \n",
+ " \n",
+ " 5500 \n",
+ " 0.226100 \n",
+ " \n",
+ " \n",
+ " 5600 \n",
+ " 0.229100 \n",
+ " \n",
+ " \n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "trainer_stats = trainer.train()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "ee4dfe12-ac7d-4af0-b8ee-9a3361eb9a48",
+ "showTitle": false,
+ "title": ""
+ },
+ "cellView": "form",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "pCqnaKmlO1U9",
+ "outputId": "98f78253-86cf-4673-ff2b-923460c2b3fd"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "3779.1598 seconds used for training.\n",
+ "62.99 minutes used for training.\n",
+ "Peak reserved memory = 1.855 GB.\n",
+ "Peak reserved memory for training = 0.656 GB.\n",
+ "Peak reserved memory % of max memory = 15.466 %.\n",
+ "Peak reserved memory for training % of max memory = 5.469 %.\n"
+ ]
+ }
+ ],
+ "source": [
+ "# @title Show final memory and time stats\n",
+ "used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
+ "used_memory_for_lora = round(used_memory - start_gpu_memory, 3)\n",
+ "used_percentage = round(used_memory / max_memory * 100, 3)\n",
+ "lora_percentage = round(used_memory_for_lora / max_memory * 100, 3)\n",
+ "print(f\"{trainer_stats.metrics['train_runtime']} seconds used for training.\")\n",
+ "print(\n",
+ " f\"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.\"\n",
+ ")\n",
+ "print(f\"Peak reserved memory = {used_memory} GB.\")\n",
+ "print(f\"Peak reserved memory for training = {used_memory_for_lora} GB.\")\n",
+ "print(f\"Peak reserved memory % of max memory = {used_percentage} %.\")\n",
+ "print(f\"Peak reserved memory for training % of max memory = {lora_percentage} %.\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "4de3b67e-eeda-4c45-9472-35f105b4c30e",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "source": [
+ " \n",
+ "### Inference\n",
+ "Let's run the model! You can change the instruction and input - leave the output blank!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "12c0141c-6a60-4fcd-a285-27eb59aa002b",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "<|im_start|>system\n",
+ "You are an expert in translating Chinese into English.<|im_end|>\n",
+ "<|im_start|>user\n",
+ "Translate from Chinese to English.\n",
+ "老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞迸着,嚓嚓有声。<|im_end|>\n",
+ "<|im_start|>assistant\n",
+ "\n",
+ "----------------------------------------\n",
+ "<|im_start|>system\n",
+ "You are an expert in translating Chinese into English.<|im_end|>\n",
+ "<|im_start|>user\n",
+ "Translate from Chinese to English.\n",
+ "老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞迸着,嚓嚓有声。<|im_end|>\n",
+ "<|im_start|>assistant\n",
+ "Old Geng lifted his gun and squinted over his shoulder, because the pistol started firing.<|im_end|>\n",
+ "CPU times: user 1.71 s, sys: 161 ms, total: 1.87 s\n",
+ "Wall time: 1.86 s\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "prompt1 = datasets[\"test\"][\"prompt\"][0]\n",
+ "print(prompt1)\n",
+ "print(\"--\" * 20)\n",
+ "test_model(model, tokenizer, prompt1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "1ea3d5bc-314e-4c80-9419-bb5dfdd0172a",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Old Geng picked up his shotgun, squinted, and pulled the trigger. Two sparrows crashed to the ground like hailstones as shotgun pellets tore noisily through the branches.\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(datasets[\"test\"][\"english\"][0])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "f620a94b-4106-4cc9-a2cd-10ddc5f53b7a",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1133/1133 [26:28<00:00, 1.40s/it]"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "CPU times: user 23min 55s, sys: 2min 33s, total: 26min 28s\n",
+ "Wall time: 26min 28s\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "predictions = eval_model(model, tokenizer, datasets[\"test\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "a4da8fd3-c055-429b-812f-0988fb7cd228",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'accuracy': 0.00088261253309797,\n",
+ " 'correct_ids': [147],\n",
+ " 'bleu_scores': {'bleu': 0.06508609399238363,\n",
+ " 'precisions': [0.3407579117113485,\n",
+ " 0.09377291935878182,\n",
+ " 0.03598822203642444,\n",
+ " 0.01652015762352228],\n",
+ " 'brevity_penalty': 0.9858565320713017,\n",
+ " 'length_ratio': 0.9859556144418682,\n",
+ " 'translation_length': 29766,\n",
+ " 'reference_length': 30190},\n",
+ " 'rouge_scores': {'rouge1': 0.32340459562777546,\n",
+ " 'rouge2': 0.11259712507132531,\n",
+ " 'rougeL': 0.2671219091010598,\n",
+ " 'rougeLsum': 0.2670685844265569}}"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "calc_metrics(datasets[\"test\"][\"english\"], predictions, debug=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "c65f8c79-a8fe-4256-9a9d-2451ee7164ba",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " chinese \\\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... \n",
+ "\n",
+ " english \\\n",
+ "0 Old Geng picked up his shotgun, squinted, and ... \n",
+ "\n",
+ " unsloth/Qwen2-0.5B-Instruct(finetuned) \n",
+ "0 Old Geng lifted his rifle and narrowed his eye... \n"
+ ]
+ }
+ ],
+ "source": [
+ "from llm_toolkit.translation_engine import save_results\n",
+ "\n",
+ "save_results(model_name + \"(finetuned)\", \"results/mac-results.tsv\", datasets[\"test\"], predictions, debug=True)"
+ ]
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "application/vnd.databricks.v1+notebook": {
+ "dashboards": [],
+ "environmentMetadata": null,
+ "language": "python",
+ "notebookMetadata": {},
+ "notebookName": "05_MAC_+_Qwen2-0.5B_Unsloth_train",
+ "widgets": {}
+ },
+ "colab": {
+ "gpuType": "T4",
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.9"
+ },
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "036fc5746f43416db18c19ad8fd36677": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "06e806c82c7b4cbea31c5358dd9c3434": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "087b76a8b7514269b1f0ab29b062e444": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_a069d2ab23824f29aa320ac256e2cfe9",
+ "placeholder": "",
+ "style": "IPY_MODEL_06e806c82c7b4cbea31c5358dd9c3434",
+ "value": "Map (num_proc=2): 100%"
+ }
+ },
+ "09b76013aa9e45efb6deb23a7a0d0925": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_dea41c5260884aa6879b5e1d1697b14f",
+ "placeholder": "",
+ "style": "IPY_MODEL_89965917796a4f81b899fdc7685f33df",
+ "value": "config.json: 100%"
+ }
+ },
+ "0a92c56bfa134ef583220d7ef0b13e17": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "0c34be936c8145d3ab41282f30a70713": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "0f8b6bfe16894500838793f2491d403f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "177c78fce95d4b4ab33057c5a048d693": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "1f44c9ce1adf470cbb19784493ed209f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_0c34be936c8145d3ab41282f30a70713",
+ "placeholder": "",
+ "style": "IPY_MODEL_0a92c56bfa134ef583220d7ef0b13e17",
+ "value": "model.safetensors: 100%"
+ }
+ },
+ "201b59ccd9f845e197029b57e424aefc": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "2157f01726d748f8a9ae4a00664430da": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "21db8a77b00d4a4e82fdfa608657531f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "26e4202cca81496a90d15a0dd4ca9cf1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_ba90fdb8822d47dab7ba203bee297f37",
+ "IPY_MODEL_61560ff6a36b44f4a9dfdae5c52791d4",
+ "IPY_MODEL_95fbe66647904c06a20f640630d6dc0e"
+ ],
+ "layout": "IPY_MODEL_57182a263d324a3dbf1471c74290a0d5"
+ }
+ },
+ "27155728b6b84cb199c91c940095d0a8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_6b91feeed5464877991ac2c207aebe7c",
+ "IPY_MODEL_cca8113c54c0495daedce1327bf9c68b",
+ "IPY_MODEL_2e63a29e2f7247bba5beede9a568c99f"
+ ],
+ "layout": "IPY_MODEL_5c9d781c28944f3eb86e2a6d44efdf18"
+ }
+ },
+ "271ddaa553a042d09b6db7b450643d8f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "2a58d04b428c46f4b3dbadd3bc6cd529": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2d18ddf6482c4d97829ac0e5a7b9868f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_9f679ad3ec7f4fe8ad0510ffb57bc2ab",
+ "IPY_MODEL_f2df530d22c74977b249dd9fb5f4829b",
+ "IPY_MODEL_89b2ef0dbfea47ab8e6f8d659e3351d1"
+ ],
+ "layout": "IPY_MODEL_3056b148aa9f4e6e8aa3b61d26886255"
+ }
+ },
+ "2e5087c76f98437cb5dc729230358cba": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2e63a29e2f7247bba5beede9a568c99f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b993eaec6b224440bf80c0958c6fb536",
+ "placeholder": "",
+ "style": "IPY_MODEL_de868e26e7154f62aa86223a539ad421",
+ "value": " 464/464 [00:00<00:00, 27.1kB/s]"
+ }
+ },
+ "2f6c70dd266c4816bfad3fd3d192929a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "30307300bc4e4baf96560e30969a82b6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e36a3f9eff0e4cf68834d66b0213ae96",
+ "placeholder": "",
+ "style": "IPY_MODEL_a0037bdccf254159becde630bee3d1db",
+ "value": "generation_config.json: 100%"
+ }
+ },
+ "3056b148aa9f4e6e8aa3b61d26886255": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "30cdc32298134cb0be4d41615b9e5774": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "3572201bd4d74a58b7a665f9bdfdcdba": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "35b0e8c26d6640e9bd0ed7b242a423d8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_2e5087c76f98437cb5dc729230358cba",
+ "max": 51760,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_036fc5746f43416db18c19ad8fd36677",
+ "value": 51760
+ }
+ },
+ "36166c7bcb854b34aca1f41a5d6ea50b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "370692d819df41828b48c4ad446f977b": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "39b29a75374b45c0a22506010be2b84e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_30cdc32298134cb0be4d41615b9e5774",
+ "max": 1179,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_47928317548c454bba6358ab132e8dee",
+ "value": 1179
+ }
+ },
+ "3cf2dd993b5e4d3daecf61e4bab5a404": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_087b76a8b7514269b1f0ab29b062e444",
+ "IPY_MODEL_35b0e8c26d6640e9bd0ed7b242a423d8",
+ "IPY_MODEL_54ad89e05fd74576b9b8b5b5a10eaf8d"
+ ],
+ "layout": "IPY_MODEL_a41dc44766444a998bec2d777f249d23"
+ }
+ },
+ "43dec2ede91341f5af60eb522e18e984": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4463edd481c1467f914c7dcd6c6e6ffc": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "47928317548c454bba6358ab132e8dee": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "49277aeeac16434a865a4d12308b1abc": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4ae7e449e4ea4c729b5f34607c18ebae": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4b2061b8a73c43ffb0c2f83daf0d0183": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4c4c88d4c701450692fa0f6b0c5764b0": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4c666f4ace3943f8b80ecd20e7503236": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "4ccedf0d93094e63b57a0f8a434fba06": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4463edd481c1467f914c7dcd6c6e6ffc",
+ "max": 44307561,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_6d3b9a05db0b4dadb638c686faa0c40a",
+ "value": 44307561
+ }
+ },
+ "4dcf6ff672d24983a1877a8431709aa9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_5807d5fb827d490fb3bc698f801ffff5",
+ "placeholder": "",
+ "style": "IPY_MODEL_c4f2b06a82fd4987b8b659524a7b503b",
+ "value": "Generating train split: 100%"
+ }
+ },
+ "4ea63adfce694725bdba878aef709dd3": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5234566b1bfc4655b8d582ea5b46ed9f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "54ad89e05fd74576b9b8b5b5a10eaf8d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_fdb1941405ed4e4aa06019933892deb3",
+ "placeholder": "",
+ "style": "IPY_MODEL_668d5377ca56426a99753867e6e24862",
+ "value": " 51760/51760 [01:02<00:00, 1131.51 examples/s]"
+ }
+ },
+ "56aee4853b7740e6a977254f5d1fa66d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "57182a263d324a3dbf1471c74290a0d5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5807d5fb827d490fb3bc698f801ffff5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5c9d781c28944f3eb86e2a6d44efdf18": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5f40db8173dd4d76b6ef5ed6d9ec8b6e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "61560ff6a36b44f4a9dfdae5c52791d4": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_db19fc8d37db4e45a5790a876836d8c4",
+ "max": 11610,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_36166c7bcb854b34aca1f41a5d6ea50b",
+ "value": 11610
+ }
+ },
+ "6578fd7acdb54c4c93528ea431fd0144": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_370692d819df41828b48c4ad446f977b",
+ "placeholder": "",
+ "style": "IPY_MODEL_a0bf9160eb2647409b3200270914b90f",
+ "value": " 50.6k/50.6k [00:00<00:00, 2.71MB/s]"
+ }
+ },
+ "668d5377ca56426a99753867e6e24862": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "697f027529b54ee9956bae78a11e0611": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "69ac12aec0714318bf2c83d4f4e745f5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "6b2012c3f88547af8884a9ea90e3164b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_938f45f1b3e24118b815d96ae34ba86a",
+ "placeholder": "",
+ "style": "IPY_MODEL_9367047a800747f79c6b225d92397846",
+ "value": " 44.3M/44.3M [00:01<00:00, 31.0MB/s]"
+ }
+ },
+ "6b91feeed5464877991ac2c207aebe7c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4b2061b8a73c43ffb0c2f83daf0d0183",
+ "placeholder": "",
+ "style": "IPY_MODEL_69ac12aec0714318bf2c83d4f4e745f5",
+ "value": "special_tokens_map.json: 100%"
+ }
+ },
+ "6d3b9a05db0b4dadb638c686faa0c40a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "6dbbedeca9314e66ae50e44ffa31a414": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "6e34619b45934040b6092e6fb01ea7fe": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "71ce208e20d6483abb9ed923510c86d7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d69dc491b3ab44d7852b21873ed7bb7f",
+ "placeholder": "",
+ "style": "IPY_MODEL_f401d53bf28e44eb906bce6c05412662",
+ "value": " 51760/51760 [00:01<00:00, 45512.81 examples/s]"
+ }
+ },
+ "7358cdad832342c983e31efb8754ab78": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "73e352a3404f4c7dad0737f57d29e92f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_988a0e8c1f89446086858da0a891a79c",
+ "IPY_MODEL_4ccedf0d93094e63b57a0f8a434fba06",
+ "IPY_MODEL_6b2012c3f88547af8884a9ea90e3164b"
+ ],
+ "layout": "IPY_MODEL_7e29cb8dd4df4d5b94407cd8fd3f2011"
+ }
+ },
+ "74501720ac7e4dbb911a4a99b3633bc6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "78e5400bff924a92a4cc61c4ff18b182": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b9b313fd861948f5aba25b24b1518d30",
+ "placeholder": "",
+ "style": "IPY_MODEL_4c666f4ace3943f8b80ecd20e7503236",
+ "value": " 1.18k/1.18k [00:00<00:00, 31.3kB/s]"
+ }
+ },
+ "7975adbc2ec5489ea7fa0167e620d85c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_6e34619b45934040b6092e6fb01ea7fe",
+ "max": 51760,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_271ddaa553a042d09b6db7b450643d8f",
+ "value": 51760
+ }
+ },
+ "7e29cb8dd4df4d5b94407cd8fd3f2011": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "810ff6c0e17d4fa09a30fef27eacff90": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "89965917796a4f81b899fdc7685f33df": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "89b2ef0dbfea47ab8e6f8d659e3351d1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b8908fa0df3743ecb9d12983a739104f",
+ "placeholder": "",
+ "style": "IPY_MODEL_177c78fce95d4b4ab33057c5a048d693",
+ "value": " 9.09M/9.09M [00:00<00:00, 32.6MB/s]"
+ }
+ },
+ "8b3505352a5a42bf910428c40ce40465": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_49277aeeac16434a865a4d12308b1abc",
+ "placeholder": "",
+ "style": "IPY_MODEL_2157f01726d748f8a9ae4a00664430da",
+ "value": " 5.70G/5.70G [01:02<00:00, 30.1MB/s]"
+ }
+ },
+ "8fc142b628fb40568730234de1cafde2": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4ae7e449e4ea4c729b5f34607c18ebae",
+ "max": 172,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_3572201bd4d74a58b7a665f9bdfdcdba",
+ "value": 172
+ }
+ },
+ "9367047a800747f79c6b225d92397846": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "938f45f1b3e24118b815d96ae34ba86a": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "95fbe66647904c06a20f640630d6dc0e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b0a370dc20654b279b9680692e34418e",
+ "placeholder": "",
+ "style": "IPY_MODEL_cfeb365ddf7548d58b2557f22737fcf5",
+ "value": " 11.6k/11.6k [00:00<00:00, 716kB/s]"
+ }
+ },
+ "988a0e8c1f89446086858da0a891a79c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_ad2be500fc164c0f86f33e914ef8e6a0",
+ "placeholder": "",
+ "style": "IPY_MODEL_5234566b1bfc4655b8d582ea5b46ed9f",
+ "value": "Downloading data: 100%"
+ }
+ },
+ "98c58f23f4d549518832cb2d18f796e8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_09b76013aa9e45efb6deb23a7a0d0925",
+ "IPY_MODEL_39b29a75374b45c0a22506010be2b84e",
+ "IPY_MODEL_78e5400bff924a92a4cc61c4ff18b182"
+ ],
+ "layout": "IPY_MODEL_2a58d04b428c46f4b3dbadd3bc6cd529"
+ }
+ },
+ "99fdbb0300c14c139d1937c646f0cfe7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_7358cdad832342c983e31efb8754ab78",
+ "placeholder": "",
+ "style": "IPY_MODEL_e9adf418296e436fb48bb9f78885598b",
+ "value": " 51760/51760 [00:01<00:00, 38665.95 examples/s]"
+ }
+ },
+ "9f679ad3ec7f4fe8ad0510ffb57bc2ab": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4ea63adfce694725bdba878aef709dd3",
+ "placeholder": "",
+ "style": "IPY_MODEL_74501720ac7e4dbb911a4a99b3633bc6",
+ "value": "tokenizer.json: 100%"
+ }
+ },
+ "a0037bdccf254159becde630bee3d1db": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "a069d2ab23824f29aa320ac256e2cfe9": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a0bf9160eb2647409b3200270914b90f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "a41dc44766444a998bec2d777f249d23": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a8464a4c711e4e00aafdfc919b60d07e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_fb995c740590427b882572c81d4e848c",
+ "placeholder": "",
+ "style": "IPY_MODEL_201b59ccd9f845e197029b57e424aefc",
+ "value": " 172/172 [00:00<00:00, 12.0kB/s]"
+ }
+ },
+ "a9f0cc51fc3d4d7b874c32dcf1c5bdf2": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ad2be500fc164c0f86f33e914ef8e6a0": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b0240cd9a4554b29ae11f8051984a1c6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_edaf890370314a218f138015faa0b05d",
+ "placeholder": "",
+ "style": "IPY_MODEL_697f027529b54ee9956bae78a11e0611",
+ "value": "Map: 100%"
+ }
+ },
+ "b0a370dc20654b279b9680692e34418e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b518dcee69074b87be73957cd810e7ed": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d891f8d0b1fc462f8008d02bb2a15692",
+ "placeholder": "",
+ "style": "IPY_MODEL_cced8fd7e998472794f3f3e3018956a5",
+ "value": "tokenizer_config.json: 100%"
+ }
+ },
+ "b8908fa0df3743ecb9d12983a739104f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b993eaec6b224440bf80c0958c6fb536": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b9b313fd861948f5aba25b24b1518d30": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ba90fdb8822d47dab7ba203bee297f37": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_0f8b6bfe16894500838793f2491d403f",
+ "placeholder": "",
+ "style": "IPY_MODEL_bb19f6c747754682a514373a3a0535ba",
+ "value": "Downloading readme: 100%"
+ }
+ },
+ "bb19f6c747754682a514373a3a0535ba": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "bc883d4cf13e4f8b8a4fe5f410cb6efd": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e9159e03e61f4f56978ece9c3bca49b2",
+ "max": 51760,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_810ff6c0e17d4fa09a30fef27eacff90",
+ "value": 51760
+ }
+ },
+ "c161d94df0f04feba9542237e0856c22": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "c22f71b1f85843209d7e5321506b9cb9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_1f44c9ce1adf470cbb19784493ed209f",
+ "IPY_MODEL_f1addc4479d849879e743cf9089e6540",
+ "IPY_MODEL_8b3505352a5a42bf910428c40ce40465"
+ ],
+ "layout": "IPY_MODEL_4c4c88d4c701450692fa0f6b0c5764b0"
+ }
+ },
+ "c4f2b06a82fd4987b8b659524a7b503b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "cca8113c54c0495daedce1327bf9c68b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e02f9b7849c64531835eb77b860d1c93",
+ "max": 464,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_56aee4853b7740e6a977254f5d1fa66d",
+ "value": 464
+ }
+ },
+ "cced8fd7e998472794f3f3e3018956a5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "cf245afeb1c04f29a24d291608c3d157": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_b518dcee69074b87be73957cd810e7ed",
+ "IPY_MODEL_e29104486d594b2992d7285e0ef77371",
+ "IPY_MODEL_6578fd7acdb54c4c93528ea431fd0144"
+ ],
+ "layout": "IPY_MODEL_d35db8148a354c56aaac56dbae22536f"
+ }
+ },
+ "cfe8cae0e22b495bafa221a63d13b283": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "cfeb365ddf7548d58b2557f22737fcf5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "d1b47d39450d4019ae85c9b2f943eeaf": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_4dcf6ff672d24983a1877a8431709aa9",
+ "IPY_MODEL_7975adbc2ec5489ea7fa0167e620d85c",
+ "IPY_MODEL_71ce208e20d6483abb9ed923510c86d7"
+ ],
+ "layout": "IPY_MODEL_cfe8cae0e22b495bafa221a63d13b283"
+ }
+ },
+ "d35db8148a354c56aaac56dbae22536f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d69dc491b3ab44d7852b21873ed7bb7f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d891f8d0b1fc462f8008d02bb2a15692": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d8e5318cead340c4adbeaccc05d39225": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "daf4cd890b35422683d22fd30bc71e83": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_b0240cd9a4554b29ae11f8051984a1c6",
+ "IPY_MODEL_bc883d4cf13e4f8b8a4fe5f410cb6efd",
+ "IPY_MODEL_99fdbb0300c14c139d1937c646f0cfe7"
+ ],
+ "layout": "IPY_MODEL_c161d94df0f04feba9542237e0856c22"
+ }
+ },
+ "db19fc8d37db4e45a5790a876836d8c4": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "de868e26e7154f62aa86223a539ad421": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "dea41c5260884aa6879b5e1d1697b14f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e02f9b7849c64531835eb77b860d1c93": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e29104486d594b2992d7285e0ef77371": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_a9f0cc51fc3d4d7b874c32dcf1c5bdf2",
+ "max": 50641,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_2f6c70dd266c4816bfad3fd3d192929a",
+ "value": 50641
+ }
+ },
+ "e36a3f9eff0e4cf68834d66b0213ae96": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e9159e03e61f4f56978ece9c3bca49b2": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e9adf418296e436fb48bb9f78885598b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "edaf890370314a218f138015faa0b05d": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "f1addc4479d849879e743cf9089e6540": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_43dec2ede91341f5af60eb522e18e984",
+ "max": 5702746405,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_d8e5318cead340c4adbeaccc05d39225",
+ "value": 5702746405
+ }
+ },
+ "f2df530d22c74977b249dd9fb5f4829b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_21db8a77b00d4a4e82fdfa608657531f",
+ "max": 9085698,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_6dbbedeca9314e66ae50e44ffa31a414",
+ "value": 9085698
+ }
+ },
+ "f401d53bf28e44eb906bce6c05412662": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "fb995c740590427b882572c81d4e848c": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "fce7a61c25ec4390af43d92b7c473a45": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_30307300bc4e4baf96560e30969a82b6",
+ "IPY_MODEL_8fc142b628fb40568730234de1cafde2",
+ "IPY_MODEL_a8464a4c711e4e00aafdfc919b60d07e"
+ ],
+ "layout": "IPY_MODEL_5f40db8173dd4d76b6ef5ed6d9ec8b6e"
+ }
+ },
+ "fdb1941405ed4e4aa06019933892deb3": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ }
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/notebooks/02_Qwen2-1.5B_Unsloth.ipynb b/notebooks/02_Qwen2-1.5B_Unsloth.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..dec98d4f0151f2bdd177702b0d9c6d7beb7d0025
--- /dev/null
+++ b/notebooks/02_Qwen2-1.5B_Unsloth.ipynb
@@ -0,0 +1,5632 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "5e06060e-f3d7-4e1e-b97e-dc57d8d17ce5",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [],
+ "source": [
+ "%load_ext autoreload\n",
+ "%autoreload 2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "6831b89a-0776-4014-a3db-9e1860a4c80c",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "workding dir: /home/inflaton/code/projects/courses/novel-translation\n"
+ ]
+ }
+ ],
+ "source": [
+ "import os\n",
+ "import sys\n",
+ "from pathlib import Path\n",
+ "\n",
+ "workding_dir = str(Path.cwd().parent)\n",
+ "os.chdir(workding_dir)\n",
+ "sys.path.append(workding_dir)\n",
+ "print(\"workding dir:\", workding_dir)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "1bdd4cdb-cb26-4527-862d-66ea2a7a1f05",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "loading env vars from: /home/inflaton/code/projects/courses/novel-translation/.env\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "True"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from dotenv import find_dotenv, load_dotenv\n",
+ "\n",
+ "found_dotenv = find_dotenv(\".env\")\n",
+ "\n",
+ "if len(found_dotenv) == 0:\n",
+ " found_dotenv = find_dotenv(\".env.example\")\n",
+ "print(f\"loading env vars from: {found_dotenv}\")\n",
+ "load_dotenv(found_dotenv, override=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "14807e21-2648-48a3-9916-6c576fc61d2e",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "('unsloth/Qwen2-1.5B-Instruct',\n",
+ " True,\n",
+ " 'models/Qwen2-1.5B-Instruct-MAC-',\n",
+ " 'Qwen2-1.5B-Instruct-MAC-',\n",
+ " 2048,\n",
+ " 10,\n",
+ " None,\n",
+ " 'datasets/mac/mac.tsv')"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import os\n",
+ "\n",
+ "model_name = os.getenv(\"MODEL_NAME\")\n",
+ "token = os.getenv(\"HF_TOKEN\") or None\n",
+ "load_in_4bit = os.getenv(\"LOAD_IN_4BIT\") == \"true\"\n",
+ "local_model = os.getenv(\"LOCAL_MODEL\")\n",
+ "hub_model = os.getenv(\"HUB_MODEL\")\n",
+ "num_train_epochs = int(os.getenv(\"NUM_TRAIN_EPOCHS\") or 0)\n",
+ "data_path = os.getenv(\"DATA_PATH\")\n",
+ "\n",
+ "max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!\n",
+ "dtype = (\n",
+ " None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+\n",
+ ")\n",
+ "\n",
+ "model_name, load_in_4bit, local_model, hub_model, max_seq_length, num_train_epochs, dtype, data_path"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "bc44b98b-6394-4b2c-af2f-8caa40b28453",
+ "showTitle": false,
+ "title": ""
+ },
+ "id": "r2v_X2fA0Df5"
+ },
+ "source": [
+ "* We support Llama, Mistral, Phi-3, Gemma, Yi, DeepSeek, Qwen, TinyLlama, Vicuna, Open Hermes etc\n",
+ "* We support 16bit LoRA or 4bit QLoRA. Both 2x faster.\n",
+ "* `max_seq_length` can be set to anything, since we do automatic RoPE Scaling via [kaiokendev's](https://kaiokendev.github.io/til) method.\n",
+ "* With [PR 26037](https://github.com/huggingface/transformers/pull/26037), we support downloading 4bit models **4x faster**! [Our repo](https://huggingface.co/unsloth) has Llama, Mistral 4bit models.\n",
+ "* [**NEW**] We make Phi-3 Medium / Mini **2x faster**! See our [Phi-3 Medium notebook](https://colab.research.google.com/drive/1hhdhBa1j_hsymiW9m-WzxQtgqTH_NHqi?usp=sharing)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "b952e9b9-edf1-4bb8-b52b-bb714852c721",
+ "showTitle": false,
+ "title": ""
+ },
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 353,
+ "referenced_widgets": [
+ "98c58f23f4d549518832cb2d18f796e8",
+ "09b76013aa9e45efb6deb23a7a0d0925",
+ "39b29a75374b45c0a22506010be2b84e",
+ "78e5400bff924a92a4cc61c4ff18b182",
+ "2a58d04b428c46f4b3dbadd3bc6cd529",
+ "dea41c5260884aa6879b5e1d1697b14f",
+ "89965917796a4f81b899fdc7685f33df",
+ "30cdc32298134cb0be4d41615b9e5774",
+ "47928317548c454bba6358ab132e8dee",
+ "b9b313fd861948f5aba25b24b1518d30",
+ "4c666f4ace3943f8b80ecd20e7503236",
+ "c22f71b1f85843209d7e5321506b9cb9",
+ "1f44c9ce1adf470cbb19784493ed209f",
+ "f1addc4479d849879e743cf9089e6540",
+ "8b3505352a5a42bf910428c40ce40465",
+ "4c4c88d4c701450692fa0f6b0c5764b0",
+ "0c34be936c8145d3ab41282f30a70713",
+ "0a92c56bfa134ef583220d7ef0b13e17",
+ "43dec2ede91341f5af60eb522e18e984",
+ "d8e5318cead340c4adbeaccc05d39225",
+ "49277aeeac16434a865a4d12308b1abc",
+ "2157f01726d748f8a9ae4a00664430da",
+ "fce7a61c25ec4390af43d92b7c473a45",
+ "30307300bc4e4baf96560e30969a82b6",
+ "8fc142b628fb40568730234de1cafde2",
+ "a8464a4c711e4e00aafdfc919b60d07e",
+ "5f40db8173dd4d76b6ef5ed6d9ec8b6e",
+ "e36a3f9eff0e4cf68834d66b0213ae96",
+ "a0037bdccf254159becde630bee3d1db",
+ "4ae7e449e4ea4c729b5f34607c18ebae",
+ "3572201bd4d74a58b7a665f9bdfdcdba",
+ "fb995c740590427b882572c81d4e848c",
+ "201b59ccd9f845e197029b57e424aefc",
+ "cf245afeb1c04f29a24d291608c3d157",
+ "b518dcee69074b87be73957cd810e7ed",
+ "e29104486d594b2992d7285e0ef77371",
+ "6578fd7acdb54c4c93528ea431fd0144",
+ "d35db8148a354c56aaac56dbae22536f",
+ "d891f8d0b1fc462f8008d02bb2a15692",
+ "cced8fd7e998472794f3f3e3018956a5",
+ "a9f0cc51fc3d4d7b874c32dcf1c5bdf2",
+ "2f6c70dd266c4816bfad3fd3d192929a",
+ "370692d819df41828b48c4ad446f977b",
+ "a0bf9160eb2647409b3200270914b90f",
+ "2d18ddf6482c4d97829ac0e5a7b9868f",
+ "9f679ad3ec7f4fe8ad0510ffb57bc2ab",
+ "f2df530d22c74977b249dd9fb5f4829b",
+ "89b2ef0dbfea47ab8e6f8d659e3351d1",
+ "3056b148aa9f4e6e8aa3b61d26886255",
+ "4ea63adfce694725bdba878aef709dd3",
+ "74501720ac7e4dbb911a4a99b3633bc6",
+ "21db8a77b00d4a4e82fdfa608657531f",
+ "6dbbedeca9314e66ae50e44ffa31a414",
+ "b8908fa0df3743ecb9d12983a739104f",
+ "177c78fce95d4b4ab33057c5a048d693",
+ "27155728b6b84cb199c91c940095d0a8",
+ "6b91feeed5464877991ac2c207aebe7c",
+ "cca8113c54c0495daedce1327bf9c68b",
+ "2e63a29e2f7247bba5beede9a568c99f",
+ "5c9d781c28944f3eb86e2a6d44efdf18",
+ "4b2061b8a73c43ffb0c2f83daf0d0183",
+ "69ac12aec0714318bf2c83d4f4e745f5",
+ "e02f9b7849c64531835eb77b860d1c93",
+ "56aee4853b7740e6a977254f5d1fa66d",
+ "b993eaec6b224440bf80c0958c6fb536",
+ "de868e26e7154f62aa86223a539ad421"
+ ]
+ },
+ "id": "QmUBVEnvCDJv",
+ "outputId": "a0e2d781-4934-415a-90b4-35165b9e44c5"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "a44529371839466cae7797d068873634",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "config.json: 0%| | 0.00/707 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "==((====))== Unsloth: Fast Qwen2 patching release 2024.5\n",
+ " \\\\ /| GPU: NVIDIA GeForce RTX 4080 Laptop GPU. Max memory: 11.994 GB. Platform = Linux.\n",
+ "O^O/ \\_/ \\ Pytorch: 2.2.2+cu121. CUDA = 8.9. CUDA Toolkit = 12.1.\n",
+ "\\ / Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = False.\n",
+ " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "6303708b46824ec791429f29c5fc9e3c",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "model.safetensors: 0%| | 0.00/3.09G [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "1f7bee3044444f50bb516e950154cd8a",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "generation_config.json: 0%| | 0.00/242 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "3400b41d20884eed8a36b4b7abe91035",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "tokenizer_config.json: 0%| | 0.00/1.32k [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "443feea33b4a4ed5b703b6963c79e7c5",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "vocab.json: 0%| | 0.00/2.78M [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "53f5f46aa4de429b81ecaa8c0af52630",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "merges.txt: 0%| | 0.00/1.67M [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "15ab203e3e8c4c3da4591ecb09d71d77",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "added_tokens.json: 0%| | 0.00/80.0 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "09a9a9b536c5472b950db964893a1176",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "special_tokens_map.json: 0%| | 0.00/367 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "9fb16fa647f341d19545f4ce5d7c7816",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "tokenizer.json: 0%| | 0.00/7.03M [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "CPU times: user 25.5 s, sys: 17.6 s, total: 43.1 s\n",
+ "Wall time: 4min 14s\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "from llm_toolkit.translation_engine import *\n",
+ "\n",
+ "model, tokenizer = load_model(model_name, load_in_4bit)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "28049473-3b0f-4aa6-bcad-11b8954d8066",
+ "showTitle": false,
+ "title": ""
+ },
+ "id": "SXd9bTZd1aaL"
+ },
+ "source": [
+ "We now add LoRA adapters so we only need to update 1 to 10% of all parameters!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "f1615e9f-a306-472f-9fa3-7c78b0edc319",
+ "showTitle": false,
+ "title": ""
+ },
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "6bZsfBuZDeCL",
+ "outputId": "bc6d9ce7-f82a-4191-d0c5-ec8247d9b9eb"
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Unsloth 2024.5 patched 28 layers with 0 QKV layers, 28 O layers and 28 MLP layers.\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "CPU times: user 12.6 s, sys: 0 ns, total: 12.6 s\n",
+ "Wall time: 1.88 s\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "model = FastLanguageModel.get_peft_model(\n",
+ " model,\n",
+ " r=16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128\n",
+ " target_modules=[\n",
+ " \"q_proj\",\n",
+ " \"k_proj\",\n",
+ " \"v_proj\",\n",
+ " \"o_proj\",\n",
+ " \"gate_proj\",\n",
+ " \"up_proj\",\n",
+ " \"down_proj\",\n",
+ " ],\n",
+ " lora_alpha=16,\n",
+ " lora_dropout=0, # Supports any, but = 0 is optimized\n",
+ " bias=\"none\", # Supports any, but = \"none\" is optimized\n",
+ " # [NEW] \"unsloth\" uses 30% less VRAM, fits 2x larger batch sizes!\n",
+ " use_gradient_checkpointing=\"unsloth\", # True or \"unsloth\" for very long context\n",
+ " random_state=3407,\n",
+ " use_rslora=False, # We support rank stabilized LoRA\n",
+ " loftq_config=None, # And LoftQ\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "16e3c2ff-36ff-4895-bfd0-59ab1b2130cc",
+ "showTitle": false,
+ "title": ""
+ },
+ "id": "vITh0KVJ10qX"
+ },
+ "source": [
+ " \n",
+ "### Data Prep\n",
+ "We now use the Alpaca dataset from [yahma](https://huggingface.co/datasets/yahma/alpaca-cleaned), which is a filtered version of 52K of the original [Alpaca dataset](https://crfm.stanford.edu/2023/03/13/alpaca.html). You can replace this code section with your own data prep.\n",
+ "\n",
+ "**[NOTE]** To train only on completions (ignoring the user's input) read TRL's docs [here](https://huggingface.co/docs/trl/sft_trainer#train-on-completions-only).\n",
+ "\n",
+ "**[NOTE]** Remember to add the **EOS_TOKEN** to the tokenized output!! Otherwise you'll get infinite generations!\n",
+ "\n",
+ "If you want to use the `llama-3` template for ShareGPT datasets, try our conversational [notebook](https://colab.research.google.com/drive/1XamvWYinY6FOSX9GLvnqSjjsNflxdhNc?usp=sharing).\n",
+ "\n",
+ "For text completions like novel writing, try this [notebook](https://colab.research.google.com/drive/1ef-tab5bhkvWmBOObepl1WgJvfvSzn5Q?usp=sharing)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "4426fdab-78f7-4a28-abf7-dc55b19db864",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "loading train/test data files\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "95a1b6aa815f461a8281e33633a28a9b",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Map: 0%| | 0/4528 [00:00, ? examples/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "06c43290ece44320a77fae8dd24fe380",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Map: 0%| | 0/1133 [00:00, ? examples/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n"
+ ]
+ }
+ ],
+ "source": [
+ "import os\n",
+ "from llm_toolkit.translation_engine import *\n",
+ "\n",
+ "datasets = load_translation_dataset(data_path, tokenizer)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "14384095-a677-4439-b906-bd4f545775cd",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "({'chinese': '全仗着狐仙搭救。',\n",
+ " 'english': 'Because I was protected by a fox fairy.',\n",
+ " 'text': '<|im_start|>system\\nYou are an expert in translating Chinese into English.<|im_end|>\\n<|im_start|>user\\nTranslate from Chinese to English.\\n全仗着狐仙搭救。<|im_end|>\\n<|im_start|>assistant\\nBecause I was protected by a fox fairy.<|im_end|>',\n",
+ " 'prompt': '<|im_start|>system\\nYou are an expert in translating Chinese into English.<|im_end|>\\n<|im_start|>user\\nTranslate from Chinese to English.\\n全仗着狐仙搭救。<|im_end|>\\n<|im_start|>assistant\\n'},\n",
+ " {'chinese': '老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞迸着,嚓嚓有声。',\n",
+ " 'english': 'Old Geng picked up his shotgun, squinted, and pulled the trigger. Two sparrows crashed to the ground like hailstones as shotgun pellets tore noisily through the branches.',\n",
+ " 'text': '<|im_start|>system\\nYou are an expert in translating Chinese into English.<|im_end|>\\n<|im_start|>user\\nTranslate from Chinese to English.\\n老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞迸着,嚓嚓有声。<|im_end|>\\n<|im_start|>assistant\\nOld Geng picked up his shotgun, squinted, and pulled the trigger. Two sparrows crashed to the ground like hailstones as shotgun pellets tore noisily through the branches.<|im_end|>',\n",
+ " 'prompt': '<|im_start|>system\\nYou are an expert in translating Chinese into English.<|im_end|>\\n<|im_start|>user\\nTranslate from Chinese to English.\\n老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞迸着,嚓嚓有声。<|im_end|>\\n<|im_start|>assistant\\n'})"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "datasets[\"train\"][0], datasets[\"test\"][0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "3e839830-d2da-48e3-b6f4-63da7a7b9dab",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "({'chinese': '周瑞家的道:“太太说:‘他们原不是一家子; 当年他们的祖和太老爷在一处做官,因连了宗的。',\n",
+ " 'english': \"'She said they don't really belong to the family but were adopted into the clan years ago when your grandfather and theirs were working in the same office.\",\n",
+ " 'text': \"<|im_start|>system\\nYou are an expert in translating Chinese into English.<|im_end|>\\n<|im_start|>user\\nTranslate from Chinese to English.\\n周瑞家的道:“太太说:‘他们原不是一家子; 当年他们的祖和太老爷在一处做官,因连了宗的。<|im_end|>\\n<|im_start|>assistant\\n'She said they don't really belong to the family but were adopted into the clan years ago when your grandfather and theirs were working in the same office.<|im_end|>\",\n",
+ " 'prompt': '<|im_start|>system\\nYou are an expert in translating Chinese into English.<|im_end|>\\n<|im_start|>user\\nTranslate from Chinese to English.\\n周瑞家的道:“太太说:‘他们原不是一家子; 当年他们的祖和太老爷在一处做官,因连了宗的。<|im_end|>\\n<|im_start|>assistant\\n'},\n",
+ " {'chinese': '“听到了吗?',\n",
+ " 'english': \"'Did you hear that?'\",\n",
+ " 'text': \"<|im_start|>system\\nYou are an expert in translating Chinese into English.<|im_end|>\\n<|im_start|>user\\nTranslate from Chinese to English.\\n“听到了吗?<|im_end|>\\n<|im_start|>assistant\\n'Did you hear that?'<|im_end|>\",\n",
+ " 'prompt': '<|im_start|>system\\nYou are an expert in translating Chinese into English.<|im_end|>\\n<|im_start|>user\\nTranslate from Chinese to English.\\n“听到了吗?<|im_end|>\\n<|im_start|>assistant\\n'})"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "datasets[\"train\"][1000], datasets[\"test\"][1000]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "03a3c02c-d3d9-49f4-87b5-2e568c174175",
+ "showTitle": false,
+ "title": ""
+ },
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 145,
+ "referenced_widgets": [
+ "26e4202cca81496a90d15a0dd4ca9cf1",
+ "ba90fdb8822d47dab7ba203bee297f37",
+ "61560ff6a36b44f4a9dfdae5c52791d4",
+ "95fbe66647904c06a20f640630d6dc0e",
+ "57182a263d324a3dbf1471c74290a0d5",
+ "0f8b6bfe16894500838793f2491d403f",
+ "bb19f6c747754682a514373a3a0535ba",
+ "db19fc8d37db4e45a5790a876836d8c4",
+ "36166c7bcb854b34aca1f41a5d6ea50b",
+ "b0a370dc20654b279b9680692e34418e",
+ "cfeb365ddf7548d58b2557f22737fcf5",
+ "73e352a3404f4c7dad0737f57d29e92f",
+ "988a0e8c1f89446086858da0a891a79c",
+ "4ccedf0d93094e63b57a0f8a434fba06",
+ "6b2012c3f88547af8884a9ea90e3164b",
+ "7e29cb8dd4df4d5b94407cd8fd3f2011",
+ "ad2be500fc164c0f86f33e914ef8e6a0",
+ "5234566b1bfc4655b8d582ea5b46ed9f",
+ "4463edd481c1467f914c7dcd6c6e6ffc",
+ "6d3b9a05db0b4dadb638c686faa0c40a",
+ "938f45f1b3e24118b815d96ae34ba86a",
+ "9367047a800747f79c6b225d92397846",
+ "d1b47d39450d4019ae85c9b2f943eeaf",
+ "4dcf6ff672d24983a1877a8431709aa9",
+ "7975adbc2ec5489ea7fa0167e620d85c",
+ "71ce208e20d6483abb9ed923510c86d7",
+ "cfe8cae0e22b495bafa221a63d13b283",
+ "5807d5fb827d490fb3bc698f801ffff5",
+ "c4f2b06a82fd4987b8b659524a7b503b",
+ "6e34619b45934040b6092e6fb01ea7fe",
+ "271ddaa553a042d09b6db7b450643d8f",
+ "d69dc491b3ab44d7852b21873ed7bb7f",
+ "f401d53bf28e44eb906bce6c05412662",
+ "daf4cd890b35422683d22fd30bc71e83",
+ "b0240cd9a4554b29ae11f8051984a1c6",
+ "bc883d4cf13e4f8b8a4fe5f410cb6efd",
+ "99fdbb0300c14c139d1937c646f0cfe7",
+ "c161d94df0f04feba9542237e0856c22",
+ "edaf890370314a218f138015faa0b05d",
+ "697f027529b54ee9956bae78a11e0611",
+ "e9159e03e61f4f56978ece9c3bca49b2",
+ "810ff6c0e17d4fa09a30fef27eacff90",
+ "7358cdad832342c983e31efb8754ab78",
+ "e9adf418296e436fb48bb9f78885598b"
+ ]
+ },
+ "id": "LjY75GoYUCB8",
+ "outputId": "7e2045fb-9ce9-49b1-b6e7-d5c9bc92455c"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "<|im_start|>system\n",
+ "You are an expert in translating Chinese into English.<|im_end|>\n",
+ "<|im_start|>user\n",
+ "Translate from Chinese to English.\n",
+ "“听到了吗?<|im_end|>\n",
+ "<|im_start|>assistant\n",
+ "\n",
+ "----------------------------------------\n",
+ "<|im_start|>system\n",
+ "You are an expert in translating Chinese into English.<|im_end|>\n",
+ "<|im_start|>user\n",
+ "Translate from Chinese to English.\n",
+ "“听到了吗?<|im_end|>\n",
+ "<|im_start|>assistant\n",
+ "Did you hear that?<|im_end|>\n",
+ "CPU times: user 1.8 s, sys: 873 ms, total: 2.68 s\n",
+ "Wall time: 2.72 s\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "prompt1 = datasets[\"test\"][\"prompt\"][1000]\n",
+ "print(prompt1)\n",
+ "print(\"--\" * 20)\n",
+ "test_model(model, tokenizer, prompt1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "22ad05ed-04e7-420f-82bf-8f990efce37c",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1133/1133 [30:01<00:00, 1.59s/it]"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "CPU times: user 27min 10s, sys: 2min 52s, total: 30min 2s\n",
+ "Wall time: 30min 1s\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "predictions = eval_model(model, tokenizer, datasets[\"test\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "eeba4278-d952-4391-8f63-c123e6098ffd",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'accuracy': 0.00176522506619594,\n",
+ " 'correct_ids': [658, 659],\n",
+ " 'bleu_scores': {'bleu': 0.08285381577653864,\n",
+ " 'precisions': [0.40636974021865224,\n",
+ " 0.12583290620194773,\n",
+ " 0.051405438435685916,\n",
+ " 0.02290685609386224],\n",
+ " 'brevity_penalty': 0.9405675222192741,\n",
+ " 'length_ratio': 0.9422656508777741,\n",
+ " 'translation_length': 28447,\n",
+ " 'reference_length': 30190},\n",
+ " 'rouge_scores': {'rouge1': 0.38844471682897896,\n",
+ " 'rouge2': 0.14120062297432684,\n",
+ " 'rougeL': 0.3280668137668106,\n",
+ " 'rougeLsum': 0.3280344032501499}}"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "calc_metrics(datasets[\"test\"][\"english\"], predictions, debug=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "2485caac-9b06-42f5-a4da-213d3e522a06",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " Unnamed: 0 chinese \\\n",
+ "0 0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... \n",
+ "\n",
+ " english \\\n",
+ "0 Old Geng picked up his shotgun, squinted, and ... \n",
+ "\n",
+ " unsloth/Qwen2-0.5B-Instruct(finetuned) \\\n",
+ "0 Old Geng lifted his rifle and narrowed his eye... \n",
+ "\n",
+ " unsloth/Qwen2-1.5B-Instruct \n",
+ "0 Old Geng took up his gun, squinted one of its ... \n"
+ ]
+ }
+ ],
+ "source": [
+ "save_results(\n",
+ " model_name,\n",
+ " \"results/mac-results.csv\",\n",
+ " datasets[\"test\"],\n",
+ " predictions,\n",
+ " debug=True,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "5c3f9939-9068-4edf-b057-e4898efeb94e",
+ "showTitle": false,
+ "title": ""
+ },
+ "id": "idAEIeSQ3xdS"
+ },
+ "source": [
+ " \n",
+ "### Train the model\n",
+ "Now let's use Huggingface TRL's `SFTTrainer`! More docs here: [TRL SFT docs](https://huggingface.co/docs/trl/sft_trainer). We do 60 steps to speed things up, but you can set `num_train_epochs=1` for a full run, and turn off `max_steps=None`. We also support TRL's `DPOTrainer`!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "053bd880-409c-4ae0-a5a5-06084ada19d5",
+ "showTitle": false,
+ "title": ""
+ },
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 122,
+ "referenced_widgets": [
+ "3cf2dd993b5e4d3daecf61e4bab5a404",
+ "087b76a8b7514269b1f0ab29b062e444",
+ "35b0e8c26d6640e9bd0ed7b242a423d8",
+ "54ad89e05fd74576b9b8b5b5a10eaf8d",
+ "a41dc44766444a998bec2d777f249d23",
+ "a069d2ab23824f29aa320ac256e2cfe9",
+ "06e806c82c7b4cbea31c5358dd9c3434",
+ "2e5087c76f98437cb5dc729230358cba",
+ "036fc5746f43416db18c19ad8fd36677",
+ "fdb1941405ed4e4aa06019933892deb3",
+ "668d5377ca56426a99753867e6e24862"
+ ]
+ },
+ "id": "95_Nn-89DhsL",
+ "outputId": "bce9db22-b022-4e43-de3f-c7ea4c9c3c4e"
+ },
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "6b952d520d494e58811bae80cf5ae883",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Map (num_proc=2): 0%| | 0/4528 [00:00, ? examples/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "from trl import SFTTrainer\n",
+ "from llm_toolkit.transformers import TrainingArguments\n",
+ "from unsloth import is_bfloat16_supported\n",
+ "\n",
+ "trainer = SFTTrainer(\n",
+ " model=model,\n",
+ " tokenizer=tokenizer,\n",
+ " train_dataset=datasets[\"train\"],\n",
+ " dataset_text_field=\"text\",\n",
+ " max_seq_length=max_seq_length,\n",
+ " dataset_num_proc=2,\n",
+ " packing=False, # Can make training 5x faster for short sequences.\n",
+ " args=TrainingArguments(\n",
+ " per_device_train_batch_size=2,\n",
+ " gradient_accumulation_steps=4,\n",
+ " warmup_steps=5,\n",
+ " num_train_epochs=num_train_epochs,\n",
+ " learning_rate=2e-4,\n",
+ " fp16=not is_bfloat16_supported(),\n",
+ " bf16=is_bfloat16_supported(),\n",
+ " logging_steps=100,\n",
+ " optim=\"adamw_8bit\",\n",
+ " weight_decay=0.01,\n",
+ " lr_scheduler_type=\"linear\",\n",
+ " seed=3407,\n",
+ " output_dir=\"outputs\",\n",
+ " ),\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "b7b322ec-e1bb-467e-9a24-7a9cff6c2402",
+ "showTitle": false,
+ "title": ""
+ },
+ "cellView": "form",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "2ejIt2xSNKKp",
+ "outputId": "c73d8dfa-f4a1-4a01-a6dc-018bf82516a2"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "3.633 GB of memory reserved.\n"
+ ]
+ }
+ ],
+ "source": [
+ "# @title Show current memory stats\n",
+ "import torch\n",
+ "\n",
+ "gpu_stats = torch.cuda.get_device_properties(0)\n",
+ "start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
+ "max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)\n",
+ "print(f\"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.\")\n",
+ "print(f\"{start_gpu_memory} GB of memory reserved.\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "31565b22-348c-4ebd-a478-ecac933086a6",
+ "showTitle": false,
+ "title": ""
+ },
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000
+ },
+ "id": "yqxqAZ7KJ4oL",
+ "outputId": "69117b9b-b6f8-4d0e-c262-6998ba2c46bd"
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n",
+ " \\\\ /| Num examples = 4,528 | Num Epochs = 10\n",
+ "O^O/ \\_/ \\ Batch size per device = 2 | Gradient Accumulation steps = 4\n",
+ "\\ / Total batch size = 8 | Total steps = 5,660\n",
+ " \"-____-\" Number of trainable parameters = 18,464,768\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ "
\n",
+ " [5660/5660 1:32:43, Epoch 10/10]\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Step \n",
+ " Training Loss \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 100 \n",
+ " 1.919100 \n",
+ " \n",
+ " \n",
+ " 200 \n",
+ " 1.774900 \n",
+ " \n",
+ " \n",
+ " 300 \n",
+ " 1.722600 \n",
+ " \n",
+ " \n",
+ " 400 \n",
+ " 1.721900 \n",
+ " \n",
+ " \n",
+ " 500 \n",
+ " 1.695700 \n",
+ " \n",
+ " \n",
+ " 600 \n",
+ " 1.612500 \n",
+ " \n",
+ " \n",
+ " 700 \n",
+ " 1.473700 \n",
+ " \n",
+ " \n",
+ " 800 \n",
+ " 1.518000 \n",
+ " \n",
+ " \n",
+ " 900 \n",
+ " 1.452100 \n",
+ " \n",
+ " \n",
+ " 1000 \n",
+ " 1.454900 \n",
+ " \n",
+ " \n",
+ " 1100 \n",
+ " 1.509600 \n",
+ " \n",
+ " \n",
+ " 1200 \n",
+ " 1.272200 \n",
+ " \n",
+ " \n",
+ " 1300 \n",
+ " 1.128400 \n",
+ " \n",
+ " \n",
+ " 1400 \n",
+ " 1.161200 \n",
+ " \n",
+ " \n",
+ " 1500 \n",
+ " 1.165600 \n",
+ " \n",
+ " \n",
+ " 1600 \n",
+ " 1.169700 \n",
+ " \n",
+ " \n",
+ " 1700 \n",
+ " 1.140900 \n",
+ " \n",
+ " \n",
+ " 1800 \n",
+ " 0.796500 \n",
+ " \n",
+ " \n",
+ " 1900 \n",
+ " 0.812800 \n",
+ " \n",
+ " \n",
+ " 2000 \n",
+ " 0.815000 \n",
+ " \n",
+ " \n",
+ " 2100 \n",
+ " 0.806600 \n",
+ " \n",
+ " \n",
+ " 2200 \n",
+ " 0.850100 \n",
+ " \n",
+ " \n",
+ " 2300 \n",
+ " 0.737200 \n",
+ " \n",
+ " \n",
+ " 2400 \n",
+ " 0.533900 \n",
+ " \n",
+ " \n",
+ " 2500 \n",
+ " 0.521600 \n",
+ " \n",
+ " \n",
+ " 2600 \n",
+ " 0.562600 \n",
+ " \n",
+ " \n",
+ " 2700 \n",
+ " 0.557700 \n",
+ " \n",
+ " \n",
+ " 2800 \n",
+ " 0.563000 \n",
+ " \n",
+ " \n",
+ " 2900 \n",
+ " 0.418500 \n",
+ " \n",
+ " \n",
+ " 3000 \n",
+ " 0.343000 \n",
+ " \n",
+ " \n",
+ " 3100 \n",
+ " 0.353900 \n",
+ " \n",
+ " \n",
+ " 3200 \n",
+ " 0.368300 \n",
+ " \n",
+ " \n",
+ " 3300 \n",
+ " 0.367600 \n",
+ " \n",
+ " \n",
+ " 3400 \n",
+ " 0.361000 \n",
+ " \n",
+ " \n",
+ " 3500 \n",
+ " 0.230000 \n",
+ " \n",
+ " \n",
+ " 3600 \n",
+ " 0.244000 \n",
+ " \n",
+ " \n",
+ " 3700 \n",
+ " 0.246400 \n",
+ " \n",
+ " \n",
+ " 3800 \n",
+ " 0.245400 \n",
+ " \n",
+ " \n",
+ " 3900 \n",
+ " 0.256800 \n",
+ " \n",
+ " \n",
+ " 4000 \n",
+ " 0.232000 \n",
+ " \n",
+ " \n",
+ " 4100 \n",
+ " 0.178700 \n",
+ " \n",
+ " \n",
+ " 4200 \n",
+ " 0.186600 \n",
+ " \n",
+ " \n",
+ " 4300 \n",
+ " 0.189200 \n",
+ " \n",
+ " \n",
+ " 4400 \n",
+ " 0.189600 \n",
+ " \n",
+ " \n",
+ " 4500 \n",
+ " 0.190100 \n",
+ " \n",
+ " \n",
+ " 4600 \n",
+ " 0.160900 \n",
+ " \n",
+ " \n",
+ " 4700 \n",
+ " 0.155000 \n",
+ " \n",
+ " \n",
+ " 4800 \n",
+ " 0.155300 \n",
+ " \n",
+ " \n",
+ " 4900 \n",
+ " 0.157400 \n",
+ " \n",
+ " \n",
+ " 5000 \n",
+ " 0.159500 \n",
+ " \n",
+ " \n",
+ " 5100 \n",
+ " 0.157000 \n",
+ " \n",
+ " \n",
+ " 5200 \n",
+ " 0.138300 \n",
+ " \n",
+ " \n",
+ " 5300 \n",
+ " 0.138600 \n",
+ " \n",
+ " \n",
+ " 5400 \n",
+ " 0.139500 \n",
+ " \n",
+ " \n",
+ " 5500 \n",
+ " 0.141400 \n",
+ " \n",
+ " \n",
+ " 5600 \n",
+ " 0.144900 \n",
+ " \n",
+ " \n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "CPU times: user 1h 23min 59s, sys: 8min 44s, total: 1h 32min 43s\n",
+ "Wall time: 1h 32min 45s\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "trainer_stats = trainer.train()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "e843842b-295f-4020-accf-393934732322",
+ "showTitle": false,
+ "title": ""
+ },
+ "cellView": "form",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "pCqnaKmlO1U9",
+ "outputId": "98f78253-86cf-4673-ff2b-923460c2b3fd"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "5564.5261 seconds used for training.\n",
+ "92.74 minutes used for training.\n",
+ "Peak reserved memory = 4.152 GB.\n",
+ "Peak reserved memory for training = 0.519 GB.\n",
+ "Peak reserved memory % of max memory = 34.617 %.\n",
+ "Peak reserved memory for training % of max memory = 4.327 %.\n"
+ ]
+ }
+ ],
+ "source": [
+ "# @title Show final memory and time stats\n",
+ "used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
+ "used_memory_for_lora = round(used_memory - start_gpu_memory, 3)\n",
+ "used_percentage = round(used_memory / max_memory * 100, 3)\n",
+ "lora_percentage = round(used_memory_for_lora / max_memory * 100, 3)\n",
+ "print(f\"{trainer_stats.metrics['train_runtime']} seconds used for training.\")\n",
+ "print(\n",
+ " f\"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.\"\n",
+ ")\n",
+ "print(f\"Peak reserved memory = {used_memory} GB.\")\n",
+ "print(f\"Peak reserved memory for training = {used_memory_for_lora} GB.\")\n",
+ "print(f\"Peak reserved memory % of max memory = {used_percentage} %.\")\n",
+ "print(f\"Peak reserved memory for training % of max memory = {lora_percentage} %.\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "72e8aeca-cd4c-44ee-82cd-04fa6728b40a",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "source": [
+ " \n",
+ "### Inference\n",
+ "Let's run the model! You can change the instruction and input - leave the output blank!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "3a76b619-1a84-4852-9be7-0f9b2bfa4c05",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "<|im_start|>system\n",
+ "You are an expert in translating Chinese into English.<|im_end|>\n",
+ "<|im_start|>user\n",
+ "Translate from Chinese to English.\n",
+ "老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞迸着,嚓嚓有声。<|im_end|>\n",
+ "<|im_start|>assistant\n",
+ "\n",
+ "----------------------------------------\n",
+ "<|im_start|>system\n",
+ "You are an expert in translating Chinese into English.<|im_end|>\n",
+ "<|im_start|>user\n",
+ "Translate from Chinese to English.\n",
+ "老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞迸着,嚓嚓有声。<|im_end|>\n",
+ "<|im_start|>assistant\n",
+ "Old Geng raised the pistol, squinted one eye, squeezed the trigger, and let a shower of jumbo pigeons drop down from skyward, coursing through the willows as though carried on silkworm tails, tossing tin cans in the air as they fell.<|im_end|>\n",
+ "CPU times: user 3.71 s, sys: 352 ms, total: 4.07 s\n",
+ "Wall time: 4.04 s\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "prompt1 = datasets[\"test\"][\"prompt\"][0]\n",
+ "print(prompt1)\n",
+ "print(\"--\" * 20)\n",
+ "test_model(model, tokenizer, prompt1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "55d0f54b-a9fc-4eb5-970e-0e9c118619bf",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Old Geng picked up his shotgun, squinted, and pulled the trigger. Two sparrows crashed to the ground like hailstones as shotgun pellets tore noisily through the branches.\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(datasets[\"test\"][\"english\"][0])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "1aa24900-40c4-45de-b8af-3d1da7070ff7",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1133/1133 [34:09<00:00, 1.81s/it] "
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "CPU times: user 30min 48s, sys: 3min 21s, total: 34min 10s\n",
+ "Wall time: 34min 9s\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "predictions = eval_model(model, tokenizer, datasets[\"test\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "68c4520d-e356-491c-9aa9-ecc57316d177",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "{'accuracy': 0.00264783759929391,\n",
+ " 'correct_ids': [147, 170, 194],\n",
+ " 'meteor': {'meteor': 0.35503843183028994},\n",
+ " 'bleu_scores': {'bleu': 0.09734851870184895,\n",
+ " 'precisions': [0.38486636126948554,\n",
+ " 0.12903115371448134,\n",
+ " 0.05879839025606325,\n",
+ " 0.030757244091566802],\n",
+ " 'brevity_penalty': 1.0,\n",
+ " 'length_ratio': 1.0050679032792316,\n",
+ " 'translation_length': 30343,\n",
+ " 'reference_length': 30190},\n",
+ " 'rouge_scores': {'rouge1': 0.3809259470501297,\n",
+ " 'rouge2': 0.1543849804952549,\n",
+ " 'rougeL': 0.32312000381943484,\n",
+ " 'rougeLsum': 0.32320284655253784}}"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "calc_metrics(datasets[\"test\"][\"english\"], predictions, debug=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "580351cd-ed04-47eb-82d7-d9fdda0dbeea",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " index chinese \\\n",
+ "0 0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... \n",
+ "\n",
+ " english \\\n",
+ "0 Old Geng picked up his shotgun, squinted, and ... \n",
+ "\n",
+ " unsloth/Qwen2-0.5B-Instruct(finetuned) \\\n",
+ "0 Old Geng lifted his rifle and narrowed his eye... \n",
+ "\n",
+ " unsloth/Qwen2-1.5B-Instruct \\\n",
+ "0 Old Geng took up his gun, squinted one of its ... \n",
+ "\n",
+ " unsloth/Qwen2-1.5B-Instruct(finetuned) \n",
+ "0 Old Geng raised the rifle, squeezed one tiny t... \n"
+ ]
+ }
+ ],
+ "source": [
+ "from llm_toolkit.translation_engine import save_results\n",
+ "\n",
+ "save_results(model_name + \"(finetuned)\", \"results/mac-results.csv\", datasets[\"test\"], predictions, debug=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "c510db20-d3c0-4fcc-a5db-ccda6b022f68",
+ "showTitle": false,
+ "title": ""
+ },
+ "id": "uMuVrWbjAzhc"
+ },
+ "source": [
+ " \n",
+ "### Saving, uploading finetuned models\n",
+ "To save the final model as LoRA adapters, either use Huggingface's `push_to_hub` for an online save or `save_pretrained` for a local save."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "5c8ebb0f-88a4-4fd6-ba0d-d3fe2ebcca51",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [],
+ "source": [
+ "def save_model(model, tokenizer, save_method, publish=True):\n",
+ " model.save_pretrained_merged(\n",
+ " local_model + save_method,\n",
+ " tokenizer,\n",
+ " save_method=save_method,\n",
+ " )\n",
+ "\n",
+ " if publish:\n",
+ " model.push_to_hub_merged(\n",
+ " hub_model + save_method,\n",
+ " tokenizer,\n",
+ " save_method=save_method,\n",
+ " token=token,\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "d8315f14-d351-42e6-8215-be7e39033e02",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Unsloth: Merging 4bit and LoRA weights to 4bit...\n",
+ "This might take 5 minutes...\n",
+ "Done.\n",
+ "Unsloth: Saving tokenizer... Done.\n",
+ "Unsloth: Saving model... This might take 10 minutes for Llama-7b... Done.\n",
+ "Unsloth: Merging 4bit and LoRA weights to 4bit...\n",
+ "This might take 5 minutes...\n",
+ "Done.\n",
+ "Unsloth: Saving 4bit Bitsandbytes model. Please wait...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "4888e1237402445b809b3b4bbab4ac25",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "README.md: 0%| | 0.00/575 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "281f3486dcdb493cb18ea2cc5bf5c967",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Upload 2 LFS files: 0%| | 0/2 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "0c35b0292c4341f9877cb9b8f604a243",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "model-00002-of-00002.safetensors: 0%| | 0.00/727M [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "793da83611824cb6afb352a765db5f30",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "model-00001-of-00002.safetensors: 0%| | 0.00/4.98G [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "20638cc38b46448796b9069075b68371",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "README.md: 0%| | 0.00/581 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Saved merged_4bit model to https://huggingface.co/Qwen2-1.5B-Instruct-MAC-merged_4bit_forced\n"
+ ]
+ }
+ ],
+ "source": [
+ "save_model(model, tokenizer, \"merged_4bit_forced\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "4cf7bb47-4d2d-491e-9272-d53145d134ab",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "CPU times: user 365 ms, sys: 1.58 ms, total: 367 ms\n",
+ "Wall time: 1.08 s\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "0"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "# Empty VRAM\n",
+ "del model\n",
+ "del trainer\n",
+ "\n",
+ "# clear memory\n",
+ "import torch\n",
+ "torch.cuda.empty_cache()\n",
+ "\n",
+ "# garbage collect\n",
+ "import gc\n",
+ "gc.collect()\n",
+ "gc.collect()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "599dbda5-98cb-4b9a-9acd-e3291121a414",
+ "showTitle": false,
+ "title": ""
+ },
+ "id": "Zt9CHJqO6p30"
+ },
+ "source": [
+ "And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/u54VK8m8tk) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord!\n",
+ "\n",
+ "Some other links:\n",
+ "1. Zephyr DPO 2x faster [free Colab](https://colab.research.google.com/drive/15vttTpzzVXv_tJwEk-hIcQ0S9FcEWvwP?usp=sharing)\n",
+ "2. Llama 7b 2x faster [free Colab](https://colab.research.google.com/drive/1lBzz5KeZJKXjvivbYvmGarix9Ao6Wxe5?usp=sharing)\n",
+ "3. TinyLlama 4x faster full Alpaca 52K in 1 hour [free Colab](https://colab.research.google.com/drive/1AZghoNBQaMDgWJpi4RbffGM1h6raLUj9?usp=sharing)\n",
+ "4. CodeLlama 34b 2x faster [A100 on Colab](https://colab.research.google.com/drive/1y7A0AxE3y8gdj4AVkl2aZX47Xu3P1wJT?usp=sharing)\n",
+ "5. Mistral 7b [free Kaggle version](https://www.kaggle.com/code/danielhanchen/kaggle-mistral-7b-unsloth-notebook)\n",
+ "6. We also did a [blog](https://huggingface.co/blog/unsloth-trl) with 🤗 HuggingFace, and we're in the TRL [docs](https://huggingface.co/docs/trl/main/en/sft_trainer#accelerate-fine-tuning-2x-using-unsloth)!\n",
+ "7. `ChatML` for ShareGPT datasets, [conversational notebook](https://colab.research.google.com/drive/1Aau3lgPzeZKQ-98h69CCu1UJcvIBLmy2?usp=sharing)\n",
+ "8. Text completions like novel writing [notebook](https://colab.research.google.com/drive/1ef-tab5bhkvWmBOObepl1WgJvfvSzn5Q?usp=sharing)\n",
+ "9. [**NEW**] We make Phi-3 Medium / Mini **2x faster**! See our [Phi-3 Medium notebook](https://colab.research.google.com/drive/1hhdhBa1j_hsymiW9m-WzxQtgqTH_NHqi?usp=sharing)\n",
+ "\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
Support our work if you can! Thanks!\n",
+ "
"
+ ]
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "application/vnd.databricks.v1+notebook": {
+ "dashboards": [],
+ "environmentMetadata": null,
+ "language": "python",
+ "notebookMetadata": {},
+ "notebookName": "06_MAC_+_Qwen2-1.5B_Unsloth_train",
+ "widgets": {}
+ },
+ "colab": {
+ "gpuType": "T4",
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.9"
+ },
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "036fc5746f43416db18c19ad8fd36677": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "06e806c82c7b4cbea31c5358dd9c3434": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "087b76a8b7514269b1f0ab29b062e444": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_a069d2ab23824f29aa320ac256e2cfe9",
+ "placeholder": "",
+ "style": "IPY_MODEL_06e806c82c7b4cbea31c5358dd9c3434",
+ "value": "Map (num_proc=2): 100%"
+ }
+ },
+ "09b76013aa9e45efb6deb23a7a0d0925": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_dea41c5260884aa6879b5e1d1697b14f",
+ "placeholder": "",
+ "style": "IPY_MODEL_89965917796a4f81b899fdc7685f33df",
+ "value": "config.json: 100%"
+ }
+ },
+ "0a92c56bfa134ef583220d7ef0b13e17": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "0c34be936c8145d3ab41282f30a70713": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "0f8b6bfe16894500838793f2491d403f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "177c78fce95d4b4ab33057c5a048d693": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "1f44c9ce1adf470cbb19784493ed209f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_0c34be936c8145d3ab41282f30a70713",
+ "placeholder": "",
+ "style": "IPY_MODEL_0a92c56bfa134ef583220d7ef0b13e17",
+ "value": "model.safetensors: 100%"
+ }
+ },
+ "201b59ccd9f845e197029b57e424aefc": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "2157f01726d748f8a9ae4a00664430da": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "21db8a77b00d4a4e82fdfa608657531f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "26e4202cca81496a90d15a0dd4ca9cf1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_ba90fdb8822d47dab7ba203bee297f37",
+ "IPY_MODEL_61560ff6a36b44f4a9dfdae5c52791d4",
+ "IPY_MODEL_95fbe66647904c06a20f640630d6dc0e"
+ ],
+ "layout": "IPY_MODEL_57182a263d324a3dbf1471c74290a0d5"
+ }
+ },
+ "27155728b6b84cb199c91c940095d0a8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_6b91feeed5464877991ac2c207aebe7c",
+ "IPY_MODEL_cca8113c54c0495daedce1327bf9c68b",
+ "IPY_MODEL_2e63a29e2f7247bba5beede9a568c99f"
+ ],
+ "layout": "IPY_MODEL_5c9d781c28944f3eb86e2a6d44efdf18"
+ }
+ },
+ "271ddaa553a042d09b6db7b450643d8f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "2a58d04b428c46f4b3dbadd3bc6cd529": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2d18ddf6482c4d97829ac0e5a7b9868f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_9f679ad3ec7f4fe8ad0510ffb57bc2ab",
+ "IPY_MODEL_f2df530d22c74977b249dd9fb5f4829b",
+ "IPY_MODEL_89b2ef0dbfea47ab8e6f8d659e3351d1"
+ ],
+ "layout": "IPY_MODEL_3056b148aa9f4e6e8aa3b61d26886255"
+ }
+ },
+ "2e5087c76f98437cb5dc729230358cba": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2e63a29e2f7247bba5beede9a568c99f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b993eaec6b224440bf80c0958c6fb536",
+ "placeholder": "",
+ "style": "IPY_MODEL_de868e26e7154f62aa86223a539ad421",
+ "value": " 464/464 [00:00<00:00, 27.1kB/s]"
+ }
+ },
+ "2f6c70dd266c4816bfad3fd3d192929a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "30307300bc4e4baf96560e30969a82b6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e36a3f9eff0e4cf68834d66b0213ae96",
+ "placeholder": "",
+ "style": "IPY_MODEL_a0037bdccf254159becde630bee3d1db",
+ "value": "generation_config.json: 100%"
+ }
+ },
+ "3056b148aa9f4e6e8aa3b61d26886255": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "30cdc32298134cb0be4d41615b9e5774": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "3572201bd4d74a58b7a665f9bdfdcdba": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "35b0e8c26d6640e9bd0ed7b242a423d8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_2e5087c76f98437cb5dc729230358cba",
+ "max": 51760,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_036fc5746f43416db18c19ad8fd36677",
+ "value": 51760
+ }
+ },
+ "36166c7bcb854b34aca1f41a5d6ea50b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "370692d819df41828b48c4ad446f977b": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "39b29a75374b45c0a22506010be2b84e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_30cdc32298134cb0be4d41615b9e5774",
+ "max": 1179,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_47928317548c454bba6358ab132e8dee",
+ "value": 1179
+ }
+ },
+ "3cf2dd993b5e4d3daecf61e4bab5a404": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_087b76a8b7514269b1f0ab29b062e444",
+ "IPY_MODEL_35b0e8c26d6640e9bd0ed7b242a423d8",
+ "IPY_MODEL_54ad89e05fd74576b9b8b5b5a10eaf8d"
+ ],
+ "layout": "IPY_MODEL_a41dc44766444a998bec2d777f249d23"
+ }
+ },
+ "43dec2ede91341f5af60eb522e18e984": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4463edd481c1467f914c7dcd6c6e6ffc": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "47928317548c454bba6358ab132e8dee": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "49277aeeac16434a865a4d12308b1abc": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4ae7e449e4ea4c729b5f34607c18ebae": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4b2061b8a73c43ffb0c2f83daf0d0183": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4c4c88d4c701450692fa0f6b0c5764b0": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4c666f4ace3943f8b80ecd20e7503236": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "4ccedf0d93094e63b57a0f8a434fba06": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4463edd481c1467f914c7dcd6c6e6ffc",
+ "max": 44307561,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_6d3b9a05db0b4dadb638c686faa0c40a",
+ "value": 44307561
+ }
+ },
+ "4dcf6ff672d24983a1877a8431709aa9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_5807d5fb827d490fb3bc698f801ffff5",
+ "placeholder": "",
+ "style": "IPY_MODEL_c4f2b06a82fd4987b8b659524a7b503b",
+ "value": "Generating train split: 100%"
+ }
+ },
+ "4ea63adfce694725bdba878aef709dd3": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5234566b1bfc4655b8d582ea5b46ed9f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "54ad89e05fd74576b9b8b5b5a10eaf8d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_fdb1941405ed4e4aa06019933892deb3",
+ "placeholder": "",
+ "style": "IPY_MODEL_668d5377ca56426a99753867e6e24862",
+ "value": " 51760/51760 [01:02<00:00, 1131.51 examples/s]"
+ }
+ },
+ "56aee4853b7740e6a977254f5d1fa66d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "57182a263d324a3dbf1471c74290a0d5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5807d5fb827d490fb3bc698f801ffff5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5c9d781c28944f3eb86e2a6d44efdf18": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5f40db8173dd4d76b6ef5ed6d9ec8b6e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "61560ff6a36b44f4a9dfdae5c52791d4": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_db19fc8d37db4e45a5790a876836d8c4",
+ "max": 11610,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_36166c7bcb854b34aca1f41a5d6ea50b",
+ "value": 11610
+ }
+ },
+ "6578fd7acdb54c4c93528ea431fd0144": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_370692d819df41828b48c4ad446f977b",
+ "placeholder": "",
+ "style": "IPY_MODEL_a0bf9160eb2647409b3200270914b90f",
+ "value": " 50.6k/50.6k [00:00<00:00, 2.71MB/s]"
+ }
+ },
+ "668d5377ca56426a99753867e6e24862": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "697f027529b54ee9956bae78a11e0611": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "69ac12aec0714318bf2c83d4f4e745f5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "6b2012c3f88547af8884a9ea90e3164b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_938f45f1b3e24118b815d96ae34ba86a",
+ "placeholder": "",
+ "style": "IPY_MODEL_9367047a800747f79c6b225d92397846",
+ "value": " 44.3M/44.3M [00:01<00:00, 31.0MB/s]"
+ }
+ },
+ "6b91feeed5464877991ac2c207aebe7c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4b2061b8a73c43ffb0c2f83daf0d0183",
+ "placeholder": "",
+ "style": "IPY_MODEL_69ac12aec0714318bf2c83d4f4e745f5",
+ "value": "special_tokens_map.json: 100%"
+ }
+ },
+ "6d3b9a05db0b4dadb638c686faa0c40a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "6dbbedeca9314e66ae50e44ffa31a414": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "6e34619b45934040b6092e6fb01ea7fe": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "71ce208e20d6483abb9ed923510c86d7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d69dc491b3ab44d7852b21873ed7bb7f",
+ "placeholder": "",
+ "style": "IPY_MODEL_f401d53bf28e44eb906bce6c05412662",
+ "value": " 51760/51760 [00:01<00:00, 45512.81 examples/s]"
+ }
+ },
+ "7358cdad832342c983e31efb8754ab78": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "73e352a3404f4c7dad0737f57d29e92f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_988a0e8c1f89446086858da0a891a79c",
+ "IPY_MODEL_4ccedf0d93094e63b57a0f8a434fba06",
+ "IPY_MODEL_6b2012c3f88547af8884a9ea90e3164b"
+ ],
+ "layout": "IPY_MODEL_7e29cb8dd4df4d5b94407cd8fd3f2011"
+ }
+ },
+ "74501720ac7e4dbb911a4a99b3633bc6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "78e5400bff924a92a4cc61c4ff18b182": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b9b313fd861948f5aba25b24b1518d30",
+ "placeholder": "",
+ "style": "IPY_MODEL_4c666f4ace3943f8b80ecd20e7503236",
+ "value": " 1.18k/1.18k [00:00<00:00, 31.3kB/s]"
+ }
+ },
+ "7975adbc2ec5489ea7fa0167e620d85c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_6e34619b45934040b6092e6fb01ea7fe",
+ "max": 51760,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_271ddaa553a042d09b6db7b450643d8f",
+ "value": 51760
+ }
+ },
+ "7e29cb8dd4df4d5b94407cd8fd3f2011": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "810ff6c0e17d4fa09a30fef27eacff90": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "89965917796a4f81b899fdc7685f33df": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "89b2ef0dbfea47ab8e6f8d659e3351d1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b8908fa0df3743ecb9d12983a739104f",
+ "placeholder": "",
+ "style": "IPY_MODEL_177c78fce95d4b4ab33057c5a048d693",
+ "value": " 9.09M/9.09M [00:00<00:00, 32.6MB/s]"
+ }
+ },
+ "8b3505352a5a42bf910428c40ce40465": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_49277aeeac16434a865a4d12308b1abc",
+ "placeholder": "",
+ "style": "IPY_MODEL_2157f01726d748f8a9ae4a00664430da",
+ "value": " 5.70G/5.70G [01:02<00:00, 30.1MB/s]"
+ }
+ },
+ "8fc142b628fb40568730234de1cafde2": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4ae7e449e4ea4c729b5f34607c18ebae",
+ "max": 172,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_3572201bd4d74a58b7a665f9bdfdcdba",
+ "value": 172
+ }
+ },
+ "9367047a800747f79c6b225d92397846": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "938f45f1b3e24118b815d96ae34ba86a": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "95fbe66647904c06a20f640630d6dc0e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b0a370dc20654b279b9680692e34418e",
+ "placeholder": "",
+ "style": "IPY_MODEL_cfeb365ddf7548d58b2557f22737fcf5",
+ "value": " 11.6k/11.6k [00:00<00:00, 716kB/s]"
+ }
+ },
+ "988a0e8c1f89446086858da0a891a79c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_ad2be500fc164c0f86f33e914ef8e6a0",
+ "placeholder": "",
+ "style": "IPY_MODEL_5234566b1bfc4655b8d582ea5b46ed9f",
+ "value": "Downloading data: 100%"
+ }
+ },
+ "98c58f23f4d549518832cb2d18f796e8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_09b76013aa9e45efb6deb23a7a0d0925",
+ "IPY_MODEL_39b29a75374b45c0a22506010be2b84e",
+ "IPY_MODEL_78e5400bff924a92a4cc61c4ff18b182"
+ ],
+ "layout": "IPY_MODEL_2a58d04b428c46f4b3dbadd3bc6cd529"
+ }
+ },
+ "99fdbb0300c14c139d1937c646f0cfe7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_7358cdad832342c983e31efb8754ab78",
+ "placeholder": "",
+ "style": "IPY_MODEL_e9adf418296e436fb48bb9f78885598b",
+ "value": " 51760/51760 [00:01<00:00, 38665.95 examples/s]"
+ }
+ },
+ "9f679ad3ec7f4fe8ad0510ffb57bc2ab": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4ea63adfce694725bdba878aef709dd3",
+ "placeholder": "",
+ "style": "IPY_MODEL_74501720ac7e4dbb911a4a99b3633bc6",
+ "value": "tokenizer.json: 100%"
+ }
+ },
+ "a0037bdccf254159becde630bee3d1db": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "a069d2ab23824f29aa320ac256e2cfe9": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a0bf9160eb2647409b3200270914b90f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "a41dc44766444a998bec2d777f249d23": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a8464a4c711e4e00aafdfc919b60d07e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_fb995c740590427b882572c81d4e848c",
+ "placeholder": "",
+ "style": "IPY_MODEL_201b59ccd9f845e197029b57e424aefc",
+ "value": " 172/172 [00:00<00:00, 12.0kB/s]"
+ }
+ },
+ "a9f0cc51fc3d4d7b874c32dcf1c5bdf2": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ad2be500fc164c0f86f33e914ef8e6a0": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b0240cd9a4554b29ae11f8051984a1c6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_edaf890370314a218f138015faa0b05d",
+ "placeholder": "",
+ "style": "IPY_MODEL_697f027529b54ee9956bae78a11e0611",
+ "value": "Map: 100%"
+ }
+ },
+ "b0a370dc20654b279b9680692e34418e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b518dcee69074b87be73957cd810e7ed": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d891f8d0b1fc462f8008d02bb2a15692",
+ "placeholder": "",
+ "style": "IPY_MODEL_cced8fd7e998472794f3f3e3018956a5",
+ "value": "tokenizer_config.json: 100%"
+ }
+ },
+ "b8908fa0df3743ecb9d12983a739104f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b993eaec6b224440bf80c0958c6fb536": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b9b313fd861948f5aba25b24b1518d30": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ba90fdb8822d47dab7ba203bee297f37": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_0f8b6bfe16894500838793f2491d403f",
+ "placeholder": "",
+ "style": "IPY_MODEL_bb19f6c747754682a514373a3a0535ba",
+ "value": "Downloading readme: 100%"
+ }
+ },
+ "bb19f6c747754682a514373a3a0535ba": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "bc883d4cf13e4f8b8a4fe5f410cb6efd": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e9159e03e61f4f56978ece9c3bca49b2",
+ "max": 51760,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_810ff6c0e17d4fa09a30fef27eacff90",
+ "value": 51760
+ }
+ },
+ "c161d94df0f04feba9542237e0856c22": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "c22f71b1f85843209d7e5321506b9cb9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_1f44c9ce1adf470cbb19784493ed209f",
+ "IPY_MODEL_f1addc4479d849879e743cf9089e6540",
+ "IPY_MODEL_8b3505352a5a42bf910428c40ce40465"
+ ],
+ "layout": "IPY_MODEL_4c4c88d4c701450692fa0f6b0c5764b0"
+ }
+ },
+ "c4f2b06a82fd4987b8b659524a7b503b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "cca8113c54c0495daedce1327bf9c68b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e02f9b7849c64531835eb77b860d1c93",
+ "max": 464,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_56aee4853b7740e6a977254f5d1fa66d",
+ "value": 464
+ }
+ },
+ "cced8fd7e998472794f3f3e3018956a5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "cf245afeb1c04f29a24d291608c3d157": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_b518dcee69074b87be73957cd810e7ed",
+ "IPY_MODEL_e29104486d594b2992d7285e0ef77371",
+ "IPY_MODEL_6578fd7acdb54c4c93528ea431fd0144"
+ ],
+ "layout": "IPY_MODEL_d35db8148a354c56aaac56dbae22536f"
+ }
+ },
+ "cfe8cae0e22b495bafa221a63d13b283": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "cfeb365ddf7548d58b2557f22737fcf5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "d1b47d39450d4019ae85c9b2f943eeaf": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_4dcf6ff672d24983a1877a8431709aa9",
+ "IPY_MODEL_7975adbc2ec5489ea7fa0167e620d85c",
+ "IPY_MODEL_71ce208e20d6483abb9ed923510c86d7"
+ ],
+ "layout": "IPY_MODEL_cfe8cae0e22b495bafa221a63d13b283"
+ }
+ },
+ "d35db8148a354c56aaac56dbae22536f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d69dc491b3ab44d7852b21873ed7bb7f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d891f8d0b1fc462f8008d02bb2a15692": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d8e5318cead340c4adbeaccc05d39225": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "daf4cd890b35422683d22fd30bc71e83": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_b0240cd9a4554b29ae11f8051984a1c6",
+ "IPY_MODEL_bc883d4cf13e4f8b8a4fe5f410cb6efd",
+ "IPY_MODEL_99fdbb0300c14c139d1937c646f0cfe7"
+ ],
+ "layout": "IPY_MODEL_c161d94df0f04feba9542237e0856c22"
+ }
+ },
+ "db19fc8d37db4e45a5790a876836d8c4": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "de868e26e7154f62aa86223a539ad421": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "dea41c5260884aa6879b5e1d1697b14f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e02f9b7849c64531835eb77b860d1c93": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e29104486d594b2992d7285e0ef77371": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_a9f0cc51fc3d4d7b874c32dcf1c5bdf2",
+ "max": 50641,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_2f6c70dd266c4816bfad3fd3d192929a",
+ "value": 50641
+ }
+ },
+ "e36a3f9eff0e4cf68834d66b0213ae96": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e9159e03e61f4f56978ece9c3bca49b2": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e9adf418296e436fb48bb9f78885598b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "edaf890370314a218f138015faa0b05d": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "f1addc4479d849879e743cf9089e6540": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_43dec2ede91341f5af60eb522e18e984",
+ "max": 5702746405,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_d8e5318cead340c4adbeaccc05d39225",
+ "value": 5702746405
+ }
+ },
+ "f2df530d22c74977b249dd9fb5f4829b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_21db8a77b00d4a4e82fdfa608657531f",
+ "max": 9085698,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_6dbbedeca9314e66ae50e44ffa31a414",
+ "value": 9085698
+ }
+ },
+ "f401d53bf28e44eb906bce6c05412662": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "fb995c740590427b882572c81d4e848c": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "fce7a61c25ec4390af43d92b7c473a45": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_30307300bc4e4baf96560e30969a82b6",
+ "IPY_MODEL_8fc142b628fb40568730234de1cafde2",
+ "IPY_MODEL_a8464a4c711e4e00aafdfc919b60d07e"
+ ],
+ "layout": "IPY_MODEL_5f40db8173dd4d76b6ef5ed6d9ec8b6e"
+ }
+ },
+ "fdb1941405ed4e4aa06019933892deb3": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ }
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/notebooks/03_Qwen2-0.5B_1.5B-4bit.ipynb b/notebooks/03_Qwen2-0.5B_1.5B-4bit.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..ab51e29ab2d8740a229405cd00b32af7f42dd7aa
--- /dev/null
+++ b/notebooks/03_Qwen2-0.5B_1.5B-4bit.ipynb
@@ -0,0 +1,5098 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "0ea8b46b-839b-445b-8043-ccdf4e920ace",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [],
+ "source": [
+ "%load_ext autoreload\n",
+ "%autoreload 2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "6d394937-6c99-4a7c-9d32-7600a280032f",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "workding dir: /home/inflaton/code/projects/courses/novel-translation\n"
+ ]
+ }
+ ],
+ "source": [
+ "import os\n",
+ "import sys\n",
+ "from pathlib import Path\n",
+ "\n",
+ "workding_dir = str(Path.cwd().parent)\n",
+ "os.chdir(workding_dir)\n",
+ "sys.path.append(workding_dir)\n",
+ "print(\"workding dir:\", workding_dir)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "9f67ec60-2f24-411c-84eb-0dd664b44775",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "loading env vars from: /home/inflaton/code/projects/courses/novel-translation/.env\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "True"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from dotenv import find_dotenv, load_dotenv\n",
+ "\n",
+ "found_dotenv = find_dotenv(\".env\")\n",
+ "\n",
+ "if len(found_dotenv) == 0:\n",
+ " found_dotenv = find_dotenv(\".env.example\")\n",
+ "print(f\"loading env vars from: {found_dotenv}\")\n",
+ "load_dotenv(found_dotenv, override=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "f1597656-8042-4878-9d3b-9ebfb8dd86dc",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "('unsloth/Qwen2-0.5B-Instruct-bnb-4bit',\n",
+ " True,\n",
+ " None,\n",
+ " None,\n",
+ " 2048,\n",
+ " 10,\n",
+ " None,\n",
+ " 'datasets/mac/mac.tsv',\n",
+ " 'results/mac-results.csv')"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import os\n",
+ "\n",
+ "model_name = os.getenv(\"MODEL_NAME\")\n",
+ "token = os.getenv(\"HF_TOKEN\") or None\n",
+ "load_in_4bit = os.getenv(\"LOAD_IN_4BIT\") == \"true\"\n",
+ "local_model = os.getenv(\"LOCAL_MODEL\")\n",
+ "hub_model = os.getenv(\"HUB_MODEL\")\n",
+ "num_train_epochs = int(os.getenv(\"NUM_TRAIN_EPOCHS\") or 0)\n",
+ "data_path = os.getenv(\"DATA_PATH\")\n",
+ "results_path = os.getenv(\"RESULTS_PATH\")\n",
+ "\n",
+ "max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!\n",
+ "dtype = (\n",
+ " None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+\n",
+ ")\n",
+ "\n",
+ "model_name, load_in_4bit, local_model, hub_model, max_seq_length, num_train_epochs, dtype, data_path, results_path"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Fri Jun 21 08:19:33 2024 \n",
+ "+---------------------------------------------------------------------------------------+\n",
+ "| NVIDIA-SMI 545.23.07 Driver Version: 546.12 CUDA Version: 12.3 |\n",
+ "|-----------------------------------------+----------------------+----------------------+\n",
+ "| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
+ "| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n",
+ "| | | MIG M. |\n",
+ "|=========================================+======================+======================|\n",
+ "| 0 NVIDIA GeForce RTX 4080 ... On | 00000000:01:00.0 On | N/A |\n",
+ "| N/A 52C P8 5W / 150W | 1156MiB / 12282MiB | 20% Default |\n",
+ "| | | N/A |\n",
+ "+-----------------------------------------+----------------------+----------------------+\n",
+ " \n",
+ "+---------------------------------------------------------------------------------------+\n",
+ "| Processes: |\n",
+ "| GPU GI CI PID Type Process name GPU Memory |\n",
+ "| ID ID Usage |\n",
+ "|=======================================================================================|\n",
+ "| No running processes found |\n",
+ "+---------------------------------------------------------------------------------------+\n"
+ ]
+ }
+ ],
+ "source": [
+ "!nvidia-smi"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Current Directory:\n",
+ "/home/inflaton/code/projects/courses/novel-translation\n",
+ "Tuning unsloth/Qwen2-0.5B-Instruct-bnb-4bit\n",
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n",
+ "loading /home/inflaton/code/projects/courses/novel-translation/translation_engine.py\n",
+ "loading env vars from: /home/inflaton/code/projects/courses/novel-translation/.env\n",
+ "unsloth/Qwen2-0.5B-Instruct-bnb-4bit True 2048 10 None datasets/mac/mac.tsv results/mac-results.csv True True True\n",
+ "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.0 GB of memory reserved.\n",
+ "loading model: unsloth/Qwen2-0.5B-Instruct-bnb-4bit\n",
+ "==((====))== Unsloth: Fast Qwen2 patching release 2024.6\n",
+ " \\\\ /| GPU: NVIDIA GeForce RTX 4080 Laptop GPU. Max memory: 11.994 GB. Platform = Linux.\n",
+ "O^O/ \\_/ \\ Pytorch: 2.3.0. CUDA = 8.9. CUDA Toolkit = 12.1.\n",
+ "\\ / Bfloat16 = TRUE. Xformers = 0.0.26.post1. FA = False.\n",
+ " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n",
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.633 GB of memory reserved.\n",
+ "loading train/test data files\n",
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n",
+ "Evaluating base model: unsloth/Qwen2-0.5B-Instruct-bnb-4bit\n",
+ "100%|███████████████████████████████████████| 1133/1133 [41:53<00:00, 2.22s/it]\n",
+ " chinese ... unsloth/Qwen2-0.5B-Instruct-bnb-4bit\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Teng raised his gun and looked up at a pai...\n",
+ "\n",
+ "[1 rows x 3 columns]\n",
+ "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "3.023 GB of memory reserved.\n",
+ "Unsloth 2024.6 patched 24 layers with 0 QKV layers, 24 O layers and 24 MLP layers.\n",
+ "(4) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "3.023 GB of memory reserved.\n",
+ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n",
+ " \\\\ /| Num examples = 4,528 | Num Epochs = 10\n",
+ "O^O/ \\_/ \\ Batch size per device = 2 | Gradient Accumulation steps = 4\n",
+ "\\ / Total batch size = 8 | Total steps = 5,660\n",
+ " \"-____-\" Number of trainable parameters = 8,798,208\n",
+ "{'loss': 1.9401, 'grad_norm': 0.9639493823051453, 'learning_rate': 0.00019664014146772768, 'epoch': 0.18}\n",
+ "{'loss': 1.7762, 'grad_norm': 0.807873547077179, 'learning_rate': 0.0001931034482758621, 'epoch': 0.35}\n",
+ "{'loss': 1.7147, 'grad_norm': 0.9143684506416321, 'learning_rate': 0.00018956675508399648, 'epoch': 0.53}\n",
+ "{'loss': 1.7154, 'grad_norm': 0.753039538860321, 'learning_rate': 0.00018603006189213086, 'epoch': 0.71}\n",
+ "{'loss': 1.6864, 'grad_norm': 0.8280349373817444, 'learning_rate': 0.00018249336870026527, 'epoch': 0.88}\n",
+ "{'loss': 1.6079, 'grad_norm': 0.8070634603500366, 'learning_rate': 0.00017895667550839965, 'epoch': 1.06}\n",
+ "{'loss': 1.492, 'grad_norm': 0.906767725944519, 'learning_rate': 0.00017541998231653406, 'epoch': 1.24}\n",
+ "{'loss': 1.5407, 'grad_norm': 0.9825711250305176, 'learning_rate': 0.00017188328912466844, 'epoch': 1.41}\n",
+ "{'loss': 1.4724, 'grad_norm': 1.0809648036956787, 'learning_rate': 0.00016834659593280285, 'epoch': 1.59}\n",
+ "{'loss': 1.4687, 'grad_norm': 0.9906221032142639, 'learning_rate': 0.00016480990274093723, 'epoch': 1.77}\n",
+ "{'loss': 1.5277, 'grad_norm': 1.0263370275497437, 'learning_rate': 0.00016127320954907164, 'epoch': 1.94}\n",
+ "{'loss': 1.3495, 'grad_norm': 1.1578023433685303, 'learning_rate': 0.000157736516357206, 'epoch': 2.12}\n",
+ "{'loss': 1.2153, 'grad_norm': 1.1819669008255005, 'learning_rate': 0.0001541998231653404, 'epoch': 2.3}\n",
+ "{'loss': 1.2577, 'grad_norm': 1.3301641941070557, 'learning_rate': 0.0001506631299734748, 'epoch': 2.47}\n",
+ "{'loss': 1.2556, 'grad_norm': 1.3167439699172974, 'learning_rate': 0.0001471264367816092, 'epoch': 2.65}\n",
+ "{'loss': 1.2661, 'grad_norm': 1.1702375411987305, 'learning_rate': 0.0001435897435897436, 'epoch': 2.83}\n",
+ "{'loss': 1.235, 'grad_norm': 1.1984246969223022, 'learning_rate': 0.000140053050397878, 'epoch': 3.0}\n",
+ "{'loss': 0.9804, 'grad_norm': 1.4932502508163452, 'learning_rate': 0.0001365163572060124, 'epoch': 3.18}\n",
+ "{'loss': 0.9925, 'grad_norm': 1.5164968967437744, 'learning_rate': 0.00013297966401414678, 'epoch': 3.36}\n",
+ "{'loss': 0.9981, 'grad_norm': 1.529332160949707, 'learning_rate': 0.0001294429708222812, 'epoch': 3.53}\n",
+ "{'loss': 0.9808, 'grad_norm': 1.5573309659957886, 'learning_rate': 0.00012590627763041555, 'epoch': 3.71}\n",
+ "{'loss': 1.0228, 'grad_norm': 1.630372166633606, 'learning_rate': 0.00012236958443854996, 'epoch': 3.89}\n",
+ "{'loss': 0.9318, 'grad_norm': 1.5458662509918213, 'learning_rate': 0.00011883289124668435, 'epoch': 4.06}\n",
+ "{'loss': 0.7395, 'grad_norm': 1.7877501249313354, 'learning_rate': 0.00011529619805481875, 'epoch': 4.24}\n",
+ "{'loss': 0.7408, 'grad_norm': 1.831641435623169, 'learning_rate': 0.00011175950486295315, 'epoch': 4.42}\n",
+ "{'loss': 0.7743, 'grad_norm': 2.053892135620117, 'learning_rate': 0.00010822281167108754, 'epoch': 4.59}\n",
+ "{'loss': 0.7734, 'grad_norm': 2.142253875732422, 'learning_rate': 0.00010468611847922194, 'epoch': 4.77}\n",
+ "{'loss': 0.7944, 'grad_norm': 2.088791847229004, 'learning_rate': 0.00010114942528735633, 'epoch': 4.95}\n",
+ "{'loss': 0.6303, 'grad_norm': 1.8649368286132812, 'learning_rate': 9.761273209549072e-05, 'epoch': 5.12}\n",
+ "{'loss': 0.5557, 'grad_norm': 1.5683393478393555, 'learning_rate': 9.407603890362513e-05, 'epoch': 5.3}\n",
+ "{'loss': 0.56, 'grad_norm': 1.6868833303451538, 'learning_rate': 9.053934571175951e-05, 'epoch': 5.48}\n",
+ "{'loss': 0.5884, 'grad_norm': 1.896024465560913, 'learning_rate': 8.70026525198939e-05, 'epoch': 5.65}\n",
+ "{'loss': 0.5797, 'grad_norm': 2.199939489364624, 'learning_rate': 8.34659593280283e-05, 'epoch': 5.83}\n",
+ "{'loss': 0.5782, 'grad_norm': 1.5285065174102783, 'learning_rate': 7.99292661361627e-05, 'epoch': 6.01}\n",
+ "{'loss': 0.3976, 'grad_norm': 2.0019567012786865, 'learning_rate': 7.639257294429708e-05, 'epoch': 6.18}\n",
+ "{'loss': 0.411, 'grad_norm': 1.8241479396820068, 'learning_rate': 7.285587975243147e-05, 'epoch': 6.36}\n",
+ "{'loss': 0.4288, 'grad_norm': 2.22087025642395, 'learning_rate': 6.931918656056587e-05, 'epoch': 6.54}\n",
+ "{'loss': 0.4213, 'grad_norm': 1.6254255771636963, 'learning_rate': 6.578249336870027e-05, 'epoch': 6.71}\n",
+ "{'loss': 0.4276, 'grad_norm': 1.4894341230392456, 'learning_rate': 6.224580017683466e-05, 'epoch': 6.89}\n",
+ "{'loss': 0.3844, 'grad_norm': 1.7346247434616089, 'learning_rate': 5.870910698496905e-05, 'epoch': 7.07}\n",
+ "{'loss': 0.2934, 'grad_norm': 1.558210015296936, 'learning_rate': 5.517241379310345e-05, 'epoch': 7.24}\n",
+ "{'loss': 0.3127, 'grad_norm': 1.2885392904281616, 'learning_rate': 5.163572060123785e-05, 'epoch': 7.42}\n",
+ "{'loss': 0.3137, 'grad_norm': 1.7340631484985352, 'learning_rate': 4.809902740937224e-05, 'epoch': 7.6}\n",
+ "{'loss': 0.3042, 'grad_norm': 1.8956586122512817, 'learning_rate': 4.4562334217506634e-05, 'epoch': 7.77}\n",
+ "{'loss': 0.3177, 'grad_norm': 1.6758400201797485, 'learning_rate': 4.1025641025641023e-05, 'epoch': 7.95}\n",
+ "{'loss': 0.2466, 'grad_norm': 1.4491084814071655, 'learning_rate': 3.7488947833775426e-05, 'epoch': 8.13}\n",
+ "{'loss': 0.226, 'grad_norm': 1.810203194618225, 'learning_rate': 3.3952254641909815e-05, 'epoch': 8.3}\n",
+ "{'loss': 0.2294, 'grad_norm': 1.1841636896133423, 'learning_rate': 3.041556145004421e-05, 'epoch': 8.48}\n",
+ "{'loss': 0.2356, 'grad_norm': 1.1479746103286743, 'learning_rate': 2.6878868258178604e-05, 'epoch': 8.66}\n",
+ "{'loss': 0.2364, 'grad_norm': 1.9742052555084229, 'learning_rate': 2.3342175066313e-05, 'epoch': 8.83}\n",
+ "{'loss': 0.2392, 'grad_norm': 1.2130563259124756, 'learning_rate': 1.9805481874447392e-05, 'epoch': 9.01}\n",
+ "{'loss': 0.181, 'grad_norm': 1.0886257886886597, 'learning_rate': 1.6268788682581788e-05, 'epoch': 9.19}\n",
+ "{'loss': 0.1906, 'grad_norm': 1.0989885330200195, 'learning_rate': 1.273209549071618e-05, 'epoch': 9.36}\n",
+ "{'loss': 0.1877, 'grad_norm': 1.1791963577270508, 'learning_rate': 9.195402298850575e-06, 'epoch': 9.54}\n",
+ "{'loss': 0.1881, 'grad_norm': 1.712857961654663, 'learning_rate': 5.658709106984969e-06, 'epoch': 9.72}\n",
+ "{'loss': 0.1891, 'grad_norm': 0.9620760083198547, 'learning_rate': 2.1220159151193635e-06, 'epoch': 9.89}\n",
+ "{'train_runtime': 5102.7155, 'train_samples_per_second': 8.874, 'train_steps_per_second': 1.109, 'train_loss': 0.7989002002001652, 'epoch': 10.0}\n",
+ "100%|█████████████████████████████████████| 5660/5660 [1:25:02<00:00, 1.11it/s]\n",
+ "(5) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "5102.7155 seconds used for training.\n",
+ "85.05 minutes used for training.\n",
+ "Peak reserved memory = 3.023 GB.\n",
+ "Peak reserved memory for training = 0.0 GB.\n",
+ "Peak reserved memory % of max memory = 25.204 %.\n",
+ "Peak reserved memory for training % of max memory = 0.0 %.\n",
+ "Evaluating fine-tuned model: unsloth/Qwen2-0.5B-Instruct-bnb-4bit\n",
+ "100%|███████████████████████████████████████| 1133/1133 [36:52<00:00, 1.95s/it]\n",
+ " chinese ... unsloth/Qwen2-0.5B-Instruct-bnb-4bit(finetuned)\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Geng raised his rifle, squinted his eyes, ...\n",
+ "\n",
+ "[1 rows x 4 columns]\n",
+ "(6) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "3.023 GB of memory reserved.\n",
+ "Unsloth: Merging 4bit and LoRA weights to 4bit...\n",
+ "This might take 5 minutes...\n",
+ "Done.\n",
+ "Unsloth: Saving tokenizer... Done.\n",
+ "Unsloth: Saving model... This might take 10 minutes for Llama-7b... Done.\n",
+ "make: Entering directory '/home/inflaton/code/projects/courses/novel-translation/llama.cpp'\n",
+ "I ccache not found. Consider installing it for faster compilation.\n",
+ "I llama.cpp build info: \n",
+ "I UNAME_S: Linux\n",
+ "I UNAME_P: x86_64\n",
+ "I UNAME_M: x86_64\n",
+ "I CFLAGS: -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_OPENMP -DGGML_USE_LLAMAFILE -std=c11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration -pthread -march=native -mtune=native -fopenmp -Wdouble-promotion \n",
+ "I CXXFLAGS: -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread -fopenmp -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_OPENMP -DGGML_USE_LLAMAFILE \n",
+ "I NVCCFLAGS: -std=c++11 -O3 \n",
+ "I LDFLAGS: \n",
+ "I CC: cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\n",
+ "I CXX: c++ (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\n",
+ "\n",
+ "rm -vrf *.o tests/*.o *.so *.a *.dll common/build-info.cpp *.dot *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report libllava.a llama-baby-llama llama-batched llama-batched-bench llama-bench llama-benchmark-matmult llama-cli llama-convert-llama2c-to-ggml llama-embedding llama-eval-callback llama-export-lora llama-finetune llama-gbnf-validator llama-gguf llama-gguf-split llama-gritlm llama-imatrix llama-infill llama-llava-cli llama-lookahead llama-lookup llama-lookup-create llama-lookup-merge llama-lookup-stats llama-parallel llama-passkey llama-perplexity llama-q8dot llama-quantize llama-quantize-stats llama-retrieval llama-save-load-state llama-server llama-simple llama-speculative llama-tokenize llama-train-text-from-scratch llama-vdot llama-cvector-generator tests/test-c.o tests/test-autorelease tests/test-backend-ops tests/test-double-float tests/test-grad0 tests/test-grammar-integration tests/test-grammar-parser tests/test-json-schema-to-grammar tests/test-llama-grammar tests/test-model-load-cancel tests/test-opt tests/test-quantize-fns tests/test-quantize-perf tests/test-rope tests/test-sampling tests/test-tokenizer-0 tests/test-tokenizer-1-bpe tests/test-tokenizer-1-spm\n",
+ "rm -vrf ggml-cuda/*.o\n",
+ "rm -vrf ggml-cuda/template-instances/*.o\n",
+ "find examples pocs -type f -name \"*.o\" -delete\n",
+ "make: Leaving directory '/home/inflaton/code/projects/courses/novel-translation/llama.cpp'\n",
+ "Unsloth: Merging 4bit and LoRA weights to 16bit...\n",
+ "Unsloth: Will use up to 30.26 out of 47.05 RAM for saving.\n",
+ "100%|███████████████████████████████████████████| 24/24 [00:00<00:00, 43.09it/s]\n",
+ "Unsloth: Saving tokenizer... Done.\n",
+ "Unsloth: Saving model... This might take 5 minutes for Llama-7b...\n",
+ "Done.\n",
+ "Unsloth: Converting qwen2 model. Can use fast conversion = False.\n",
+ "==((====))== Unsloth: Conversion from QLoRA to GGUF information\n",
+ " \\\\ /| [0] Installing llama.cpp will take 3 minutes.\n",
+ "O^O/ \\_/ \\ [1] Converting HF to GUUF 16bits will take 3 minutes.\n",
+ "\\ / [2] Converting GGUF 16bits to ['q5_k_m'] will take 10 minutes each.\n",
+ " \"-____-\" In total, you will have to wait at least 16 minutes.\n",
+ "\n",
+ "Unsloth: [0] Installing llama.cpp. This will take 3 minutes...\n",
+ "Unsloth: [1] Converting model at models/Qwen2-0.5B-Instruct-bnb-4bit-MAC-q5_k_m into bf16 GGUF format.\n",
+ "The output location will be ./models/Qwen2-0.5B-Instruct-bnb-4bit-MAC-q5_k_m/unsloth.BF16.gguf\n",
+ "This will take 3 minutes...\n",
+ "INFO:hf-to-gguf:Loading model: Qwen2-0.5B-Instruct-bnb-4bit-MAC-q5_k_m\n",
+ "INFO:gguf.gguf_writer:gguf: This GGUF file is for Little Endian only\n",
+ "INFO:hf-to-gguf:Set model parameters\n",
+ "INFO:hf-to-gguf:gguf: context length = 32768\n",
+ "INFO:hf-to-gguf:gguf: embedding length = 896\n",
+ "INFO:hf-to-gguf:gguf: feed forward length = 4864\n",
+ "INFO:hf-to-gguf:gguf: head count = 14\n",
+ "INFO:hf-to-gguf:gguf: key-value head count = 2\n",
+ "INFO:hf-to-gguf:gguf: rope theta = 1000000.0\n",
+ "INFO:hf-to-gguf:gguf: rms norm epsilon = 1e-06\n",
+ "INFO:hf-to-gguf:gguf: file type = 32\n",
+ "INFO:hf-to-gguf:Set model tokenizer\n",
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "INFO:gguf.vocab:Adding 151387 merge(s).\n",
+ "INFO:gguf.vocab:Setting special token type eos to 151645\n",
+ "INFO:gguf.vocab:Setting special token type pad to 151643\n",
+ "INFO:gguf.vocab:Setting special token type bos to 151643\n",
+ "INFO:gguf.vocab:Setting chat_template to {% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\n",
+ "You are a helpful assistant.<|im_end|>\n",
+ "' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n",
+ "' + message['content'] + '<|im_end|>' + '\n",
+ "'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n",
+ "' }}{% endif %}\n",
+ "INFO:hf-to-gguf:Exporting model to 'models/Qwen2-0.5B-Instruct-bnb-4bit-MAC-q5_k_m/unsloth.BF16.gguf'\n",
+ "INFO:hf-to-gguf:gguf: loading model part 'model.safetensors'\n",
+ "INFO:hf-to-gguf:token_embd.weight, torch.bfloat16 --> BF16, shape = {896, 151936}\n",
+ "INFO:hf-to-gguf:blk.0.attn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.0.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896}\n",
+ "INFO:hf-to-gguf:blk.0.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.0.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.0.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.0.attn_k.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.0.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.0.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.0.attn_q.bias, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.0.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.0.attn_v.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.0.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.1.attn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.1.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896}\n",
+ "INFO:hf-to-gguf:blk.1.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.1.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.1.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.1.attn_k.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.1.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.1.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.1.attn_q.bias, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.1.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.1.attn_v.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.1.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.10.attn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.10.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896}\n",
+ "INFO:hf-to-gguf:blk.10.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.10.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.10.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.10.attn_k.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.10.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.10.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.10.attn_q.bias, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.10.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.10.attn_v.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.10.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.11.attn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.11.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896}\n",
+ "INFO:hf-to-gguf:blk.11.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.11.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.11.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.11.attn_k.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.11.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.11.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.11.attn_q.bias, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.11.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.11.attn_v.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.11.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.12.attn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.12.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896}\n",
+ "INFO:hf-to-gguf:blk.12.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.12.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.12.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.12.attn_k.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.12.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.12.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.12.attn_q.bias, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.12.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.12.attn_v.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.12.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.13.attn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.13.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896}\n",
+ "INFO:hf-to-gguf:blk.13.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.13.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.13.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.13.attn_k.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.13.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.13.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.13.attn_q.bias, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.13.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.13.attn_v.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.13.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.14.attn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.14.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896}\n",
+ "INFO:hf-to-gguf:blk.14.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.14.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.14.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.14.attn_k.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.14.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.14.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.14.attn_q.bias, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.14.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.14.attn_v.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.14.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.15.attn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.15.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896}\n",
+ "INFO:hf-to-gguf:blk.15.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.15.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.15.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.15.attn_k.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.15.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.15.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.15.attn_q.bias, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.15.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.15.attn_v.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.15.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.16.attn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.16.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896}\n",
+ "INFO:hf-to-gguf:blk.16.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.16.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.16.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.16.attn_k.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.16.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.16.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.16.attn_q.bias, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.16.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.16.attn_v.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.16.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.17.attn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.17.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896}\n",
+ "INFO:hf-to-gguf:blk.17.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.17.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.17.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.17.attn_k.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.17.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.17.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.17.attn_q.bias, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.17.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.17.attn_v.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.17.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.18.attn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.18.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896}\n",
+ "INFO:hf-to-gguf:blk.18.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.18.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.18.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.18.attn_k.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.18.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.18.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.18.attn_q.bias, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.18.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.18.attn_v.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.18.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.19.attn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.19.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896}\n",
+ "INFO:hf-to-gguf:blk.19.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.19.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.19.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.19.attn_k.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.19.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.19.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.19.attn_q.bias, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.19.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.19.attn_v.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.19.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.2.attn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.2.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896}\n",
+ "INFO:hf-to-gguf:blk.2.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.2.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.2.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.2.attn_k.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.2.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.2.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.2.attn_q.bias, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.2.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.2.attn_v.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.2.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.20.attn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.20.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896}\n",
+ "INFO:hf-to-gguf:blk.20.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.20.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.20.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.20.attn_k.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.20.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.20.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.20.attn_q.bias, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.20.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.20.attn_v.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.20.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.21.attn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.21.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896}\n",
+ "INFO:hf-to-gguf:blk.21.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.21.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.21.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.21.attn_k.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.21.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.21.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.21.attn_q.bias, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.21.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.21.attn_v.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.21.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.22.attn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.22.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896}\n",
+ "INFO:hf-to-gguf:blk.22.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.22.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.22.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.22.attn_k.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.22.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.22.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.22.attn_q.bias, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.22.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.22.attn_v.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.22.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.23.attn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.23.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896}\n",
+ "INFO:hf-to-gguf:blk.23.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.23.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.23.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.23.attn_k.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.23.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.23.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.23.attn_q.bias, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.23.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.23.attn_v.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.23.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.3.attn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.3.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896}\n",
+ "INFO:hf-to-gguf:blk.3.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.3.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.3.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.3.attn_k.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.3.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.3.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.3.attn_q.bias, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.3.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.3.attn_v.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.3.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.4.attn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.4.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896}\n",
+ "INFO:hf-to-gguf:blk.4.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.4.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.4.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.4.attn_k.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.4.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.4.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.4.attn_q.bias, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.4.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.4.attn_v.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.4.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.5.attn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.5.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896}\n",
+ "INFO:hf-to-gguf:blk.5.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.5.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.5.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.5.attn_k.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.5.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.5.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.5.attn_q.bias, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.5.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.5.attn_v.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.5.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.6.attn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.6.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896}\n",
+ "INFO:hf-to-gguf:blk.6.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.6.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.6.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.6.attn_k.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.6.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.6.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.6.attn_q.bias, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.6.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.6.attn_v.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.6.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.7.attn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.7.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896}\n",
+ "INFO:hf-to-gguf:blk.7.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.7.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.7.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.7.attn_k.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.7.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.7.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.7.attn_q.bias, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.7.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.7.attn_v.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.7.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.8.attn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.8.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896}\n",
+ "INFO:hf-to-gguf:blk.8.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.8.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.8.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.8.attn_k.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.8.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.8.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.8.attn_q.bias, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.8.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.8.attn_v.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.8.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.9.attn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.9.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896}\n",
+ "INFO:hf-to-gguf:blk.9.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.9.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864}\n",
+ "INFO:hf-to-gguf:blk.9.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.9.attn_k.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.9.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:blk.9.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.9.attn_q.bias, torch.bfloat16 --> F32, shape = {896}\n",
+ "INFO:hf-to-gguf:blk.9.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896}\n",
+ "INFO:hf-to-gguf:blk.9.attn_v.bias, torch.bfloat16 --> F32, shape = {128}\n",
+ "INFO:hf-to-gguf:blk.9.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128}\n",
+ "INFO:hf-to-gguf:output_norm.weight, torch.bfloat16 --> F32, shape = {896}\n",
+ "Writing: 0%| | 0.00/988M [00:00, ?byte/s]Traceback (most recent call last):\n",
+ " File \"/home/inflaton/code/projects/courses/novel-translation/llama.cpp/convert-hf-to-gguf.py\", line 2887, in \n",
+ " main()\n",
+ " File \"/home/inflaton/code/projects/courses/novel-translation/llama.cpp/convert-hf-to-gguf.py\", line 2881, in main\n",
+ " model_instance.write()\n",
+ " File \"/home/inflaton/code/projects/courses/novel-translation/llama.cpp/convert-hf-to-gguf.py\", line 331, in write\n",
+ " self.gguf_writer.write_tensors_to_file(progress=True)\n",
+ " File \"/home/inflaton/code/projects/courses/novel-translation/llama.cpp/gguf-py/gguf/gguf_writer.py\", line 312, in write_tensors_to_file\n",
+ " ti.tensor.tofile(self.fout)\n",
+ " File \"/home/inflaton/code/projects/courses/novel-translation/llama.cpp/gguf-py/gguf/lazy.py\", line 233, in tofile\n",
+ " eager = LazyNumpyTensor.to_eager(self)\n",
+ " File \"/home/inflaton/code/projects/courses/novel-translation/llama.cpp/gguf-py/gguf/lazy.py\", line 193, in to_eager\n",
+ " return cls._recurse_apply(t, simple_to_eager)\n",
+ " File \"/home/inflaton/code/projects/courses/novel-translation/llama.cpp/gguf-py/gguf/lazy.py\", line 109, in _recurse_apply\n",
+ " return fn(o)\n",
+ " File \"/home/inflaton/code/projects/courses/novel-translation/llama.cpp/gguf-py/gguf/lazy.py\", line 185, in simple_to_eager\n",
+ " lt._data = lt._func(lt._args)\n",
+ " File \"/home/inflaton/code/projects/courses/novel-translation/llama.cpp/gguf-py/gguf/lazy.py\", line 158, in \n",
+ " return cls(meta=cls.eager_to_meta(res), lazy=shared_lazy, args=args, func=lambda a: fn(*a, **kwargs))\n",
+ " File \"/home/inflaton/code/projects/courses/novel-translation/llama.cpp/gguf-py/gguf/quants.py\", line 52, in __quantize_bf16_array\n",
+ " return __apply_over_grouped_rows(__compute_fp32_to_bf16, arr=n, otype=np.int16, oshape=n.shape)\n",
+ " File \"/home/inflaton/code/projects/courses/novel-translation/llama.cpp/gguf-py/gguf/quants.py\", line 47, in __apply_over_grouped_rows\n",
+ " np.concatenate([func(group).ravel() for group in np.array_split(rows, n_groups)], axis=0, out=out)\n",
+ " File \"/home/inflaton/code/projects/courses/novel-translation/llama.cpp/gguf-py/gguf/quants.py\", line 47, in \n",
+ " np.concatenate([func(group).ravel() for group in np.array_split(rows, n_groups)], axis=0, out=out)\n",
+ " File \"/home/inflaton/code/projects/courses/novel-translation/llama.cpp/gguf-py/gguf/quants.py\", line 30, in __compute_fp32_to_bf16\n",
+ " n = np.where((n & 0x7fffffff) > 0x7f800000, (n & 0xffff0000) | (64 << 16), n)\n",
+ "OverflowError: Python integer 4294901760 out of bounds for int32\n",
+ "Writing: 0%| | 0.00/988M [00:00, ?byte/s]\n",
+ "Unsloth: Conversion completed! Output location: ./models/Qwen2-0.5B-Instruct-bnb-4bit-MAC-q5_k_m/unsloth.BF16.gguf\n",
+ "Unsloth: [2] Converting GGUF 16bit into q5_k_m. This will take 20 minutes...\n",
+ "main: build = 3192 (b1ef562b)\n",
+ "main: built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu\n",
+ "main: quantizing './models/Qwen2-0.5B-Instruct-bnb-4bit-MAC-q5_k_m/unsloth.BF16.gguf' to './models/Qwen2-0.5B-Instruct-bnb-4bit-MAC-q5_k_m/unsloth.Q5_K_M.gguf' as Q5_K_M using 48 threads\n",
+ "llama_model_quantize: failed to quantize: tensor 'token_embd.weight' data is not within the file bounds, model is corrupted or incomplete\n",
+ "main: failed to quantize model from './models/Qwen2-0.5B-Instruct-bnb-4bit-MAC-q5_k_m/unsloth.BF16.gguf'\n",
+ "Traceback (most recent call last):\n",
+ " File \"/home/inflaton/code/projects/courses/novel-translation/tune.py\", line 136, in \n",
+ " save_model(model, tokenizer)\n",
+ " File \"/home/inflaton/code/projects/courses/novel-translation/translation_engine.py\", line 219, in save_model\n",
+ " model.save_pretrained_gguf(\n",
+ " File \"/home/inflaton/miniconda3/envs/unsloth_env/lib/python3.10/site-packages/unsloth/save.py\", line 1527, in unsloth_save_pretrained_gguf\n",
+ " all_file_locations = save_to_gguf(model_type, model_dtype, is_sentencepiece_model,\n",
+ " File \"/home/inflaton/miniconda3/envs/unsloth_env/lib/python3.10/site-packages/unsloth/save.py\", line 1113, in save_to_gguf\n",
+ " raise RuntimeError(\n",
+ "RuntimeError: Unsloth: Quantization failed! You might have to compile llama.cpp yourself, then run this again.\n",
+ "You do not need to close this Python program. Run the following commands in a new terminal:\n",
+ "You must run this in the same folder as you're saving your model.\n",
+ "git clone --recursive https://github.com/ggerganov/llama.cpp\n",
+ "cd llama.cpp && make clean && make all -j\n",
+ "Once that's done, redo the quantization.\n",
+ "Tuning unsloth/Qwen2-1.5B-Instruct-bnb-4bit\n",
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n",
+ "loading /home/inflaton/code/projects/courses/novel-translation/translation_engine.py\n",
+ "loading env vars from: /home/inflaton/code/projects/courses/novel-translation/.env\n",
+ "unsloth/Qwen2-1.5B-Instruct-bnb-4bit True 2048 10 None datasets/mac/mac.tsv results/mac-results.csv True True True\n",
+ "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.0 GB of memory reserved.\n",
+ "loading model: unsloth/Qwen2-1.5B-Instruct-bnb-4bit\n",
+ "==((====))== Unsloth: Fast Qwen2 patching release 2024.6\n",
+ " \\\\ /| GPU: NVIDIA GeForce RTX 4080 Laptop GPU. Max memory: 11.994 GB. Platform = Linux.\n",
+ "O^O/ \\_/ \\ Pytorch: 2.3.0. CUDA = 8.9. CUDA Toolkit = 12.1.\n",
+ "\\ / Bfloat16 = TRUE. Xformers = 0.0.26.post1. FA = False.\n",
+ " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n",
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "1.516 GB of memory reserved.\n",
+ "loading train/test data files\n",
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n",
+ "Evaluating base model: unsloth/Qwen2-1.5B-Instruct-bnb-4bit\n",
+ "100%|███████████████████████████████████████| 1133/1133 [59:36<00:00, 3.16s/it]\n",
+ " chinese ... unsloth/Qwen2-1.5B-Instruct-bnb-4bit\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old耿拿起枪,眯着眼睛一搂扳机就响了枪,金麻雀噼里啪啦的往下掉,铁砂子在柳枝间飞溅,发出“...\n",
+ "\n",
+ "[1 rows x 5 columns]\n",
+ "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "3.945 GB of memory reserved.\n",
+ "Unsloth 2024.6 patched 28 layers with 0 QKV layers, 28 O layers and 28 MLP layers.\n",
+ "(4) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "3.945 GB of memory reserved.\n",
+ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n",
+ " \\\\ /| Num examples = 4,528 | Num Epochs = 10\n",
+ "O^O/ \\_/ \\ Batch size per device = 2 | Gradient Accumulation steps = 4\n",
+ "\\ / Total batch size = 8 | Total steps = 5,660\n",
+ " \"-____-\" Number of trainable parameters = 18,464,768\n",
+ "{'loss': 1.7416, 'grad_norm': 0.6486354470252991, 'learning_rate': 0.00019664014146772768, 'epoch': 0.18}\n",
+ "{'loss': 1.568, 'grad_norm': 0.6254323124885559, 'learning_rate': 0.0001931034482758621, 'epoch': 0.35}\n",
+ "{'loss': 1.5159, 'grad_norm': 0.6513530015945435, 'learning_rate': 0.00018956675508399648, 'epoch': 0.53}\n",
+ "{'loss': 1.5169, 'grad_norm': 0.5732458233833313, 'learning_rate': 0.00018603006189213086, 'epoch': 0.71}\n",
+ "{'loss': 1.4958, 'grad_norm': 0.5724458694458008, 'learning_rate': 0.00018249336870026527, 'epoch': 0.88}\n",
+ " 9%|███▎ | 500/5660 [12:05<2:13:34, 1.55s/it]/home/inflaton/miniconda3/envs/unsloth_env/lib/python3.10/site-packages/peft/utils/other.py:611: UserWarning: Unable to fetch remote file due to the following error (MaxRetryError('HTTPSConnectionPool(host=\\'huggingface.co\\', port=443): Max retries exceeded with url: /unsloth/Qwen2-1.5B-Instruct-bnb-4bit/resolve/main/config.json (Caused by NameResolutionError(\": Failed to resolve \\'huggingface.co\\' ([Errno -3] Temporary failure in name resolution)\"))'), '(Request ID: 73fef4ae-41d2-4b61-b3af-92f4996c5ae6)') - silently ignoring the lookup for the file config.json in unsloth/Qwen2-1.5B-Instruct-bnb-4bit.\n",
+ " warnings.warn(\n",
+ "/home/inflaton/miniconda3/envs/unsloth_env/lib/python3.10/site-packages/peft/utils/save_and_load.py:195: UserWarning: Could not find a config file in unsloth/Qwen2-1.5B-Instruct-bnb-4bit - will assume that the vocabulary was not modified.\n",
+ " warnings.warn(\n",
+ "{'loss': 1.4181, 'grad_norm': 0.6020762324333191, 'learning_rate': 0.00017895667550839965, 'epoch': 1.06}\n",
+ "{'loss': 1.2977, 'grad_norm': 0.7245773077011108, 'learning_rate': 0.00017541998231653406, 'epoch': 1.24}\n",
+ "{'loss': 1.3392, 'grad_norm': 0.6871509552001953, 'learning_rate': 0.00017188328912466844, 'epoch': 1.41}\n",
+ "{'loss': 1.2784, 'grad_norm': 0.89788419008255, 'learning_rate': 0.00016834659593280285, 'epoch': 1.59}\n",
+ "{'loss': 1.2796, 'grad_norm': 0.8698562383651733, 'learning_rate': 0.00016480990274093723, 'epoch': 1.77}\n",
+ " 18%|██████▌ | 1000/5660 [25:03<1:52:33, 1.45s/it]/home/inflaton/miniconda3/envs/unsloth_env/lib/python3.10/site-packages/peft/utils/other.py:611: UserWarning: Unable to fetch remote file due to the following error (MaxRetryError('HTTPSConnectionPool(host=\\'huggingface.co\\', port=443): Max retries exceeded with url: /unsloth/Qwen2-1.5B-Instruct-bnb-4bit/resolve/main/config.json (Caused by NameResolutionError(\": Failed to resolve \\'huggingface.co\\' ([Errno -3] Temporary failure in name resolution)\"))'), '(Request ID: aec2499a-0591-44e8-bbc9-1568ebca28ce)') - silently ignoring the lookup for the file config.json in unsloth/Qwen2-1.5B-Instruct-bnb-4bit.\n",
+ " warnings.warn(\n",
+ "/home/inflaton/miniconda3/envs/unsloth_env/lib/python3.10/site-packages/peft/utils/save_and_load.py:195: UserWarning: Could not find a config file in unsloth/Qwen2-1.5B-Instruct-bnb-4bit - will assume that the vocabulary was not modified.\n",
+ " warnings.warn(\n",
+ "{'loss': 1.3317, 'grad_norm': 0.8551518321037292, 'learning_rate': 0.00016127320954907164, 'epoch': 1.94}\n",
+ "{'loss': 1.1279, 'grad_norm': 0.9991661310195923, 'learning_rate': 0.000157736516357206, 'epoch': 2.12}\n",
+ "{'loss': 0.9962, 'grad_norm': 1.0851796865463257, 'learning_rate': 0.0001541998231653404, 'epoch': 2.3}\n",
+ "{'loss': 1.03, 'grad_norm': 1.223488450050354, 'learning_rate': 0.0001506631299734748, 'epoch': 2.47}\n",
+ "{'loss': 1.0346, 'grad_norm': 1.1075948476791382, 'learning_rate': 0.0001471264367816092, 'epoch': 2.65}\n",
+ "{'loss': 1.0356, 'grad_norm': 1.0564122200012207, 'learning_rate': 0.0001435897435897436, 'epoch': 2.83}\n",
+ "{'loss': 1.0085, 'grad_norm': 0.8879927396774292, 'learning_rate': 0.000140053050397878, 'epoch': 3.0}\n",
+ "{'loss': 0.6982, 'grad_norm': 1.2948524951934814, 'learning_rate': 0.0001365163572060124, 'epoch': 3.18}\n",
+ "{'loss': 0.7112, 'grad_norm': 1.411033034324646, 'learning_rate': 0.00013297966401414678, 'epoch': 3.36}\n",
+ "{'loss': 0.7143, 'grad_norm': 1.2779042720794678, 'learning_rate': 0.0001294429708222812, 'epoch': 3.53}\n",
+ "{'loss': 0.7048, 'grad_norm': 1.4070408344268799, 'learning_rate': 0.00012590627763041555, 'epoch': 3.71}\n",
+ "{'loss': 0.7431, 'grad_norm': 1.2386316061019897, 'learning_rate': 0.00012236958443854996, 'epoch': 3.89}\n",
+ "{'loss': 0.6442, 'grad_norm': 1.4571985006332397, 'learning_rate': 0.00011883289124668435, 'epoch': 4.06}\n",
+ "{'loss': 0.4587, 'grad_norm': 1.3495371341705322, 'learning_rate': 0.00011529619805481875, 'epoch': 4.24}\n",
+ "{'loss': 0.4463, 'grad_norm': 1.4228668212890625, 'learning_rate': 0.00011175950486295315, 'epoch': 4.42}\n",
+ "{'loss': 0.4788, 'grad_norm': 1.4749239683151245, 'learning_rate': 0.00010822281167108754, 'epoch': 4.59}\n",
+ "{'loss': 0.4788, 'grad_norm': 1.9813690185546875, 'learning_rate': 0.00010468611847922194, 'epoch': 4.77}\n",
+ "{'loss': 0.4853, 'grad_norm': 1.4942362308502197, 'learning_rate': 0.00010114942528735633, 'epoch': 4.95}\n",
+ "{'loss': 0.358, 'grad_norm': 1.398263692855835, 'learning_rate': 9.761273209549072e-05, 'epoch': 5.12}\n",
+ "{'loss': 0.2917, 'grad_norm': 1.148834228515625, 'learning_rate': 9.407603890362513e-05, 'epoch': 5.3}\n",
+ "{'loss': 0.2974, 'grad_norm': 1.8417447805404663, 'learning_rate': 9.053934571175951e-05, 'epoch': 5.48}\n",
+ "{'loss': 0.316, 'grad_norm': 1.3501039743423462, 'learning_rate': 8.70026525198939e-05, 'epoch': 5.65}\n",
+ "{'loss': 0.3081, 'grad_norm': 1.3924249410629272, 'learning_rate': 8.34659593280283e-05, 'epoch': 5.83}\n",
+ "{'loss': 0.3056, 'grad_norm': 0.7402265071868896, 'learning_rate': 7.99292661361627e-05, 'epoch': 6.01}\n",
+ "{'loss': 0.1918, 'grad_norm': 1.6596429347991943, 'learning_rate': 7.639257294429708e-05, 'epoch': 6.18}\n",
+ "{'loss': 0.2038, 'grad_norm': 1.4057574272155762, 'learning_rate': 7.285587975243147e-05, 'epoch': 6.36}\n",
+ "{'loss': 0.2068, 'grad_norm': 1.0855220556259155, 'learning_rate': 6.931918656056587e-05, 'epoch': 6.54}\n",
+ "{'loss': 0.2067, 'grad_norm': 1.2501165866851807, 'learning_rate': 6.578249336870027e-05, 'epoch': 6.71}\n",
+ "{'loss': 0.2132, 'grad_norm': 1.1509685516357422, 'learning_rate': 6.224580017683466e-05, 'epoch': 6.89}\n",
+ "{'loss': 0.1937, 'grad_norm': 0.7672592401504517, 'learning_rate': 5.870910698496905e-05, 'epoch': 7.07}\n",
+ "{'loss': 0.1486, 'grad_norm': 0.8496847152709961, 'learning_rate': 5.517241379310345e-05, 'epoch': 7.24}\n",
+ "{'loss': 0.156, 'grad_norm': 0.8369797468185425, 'learning_rate': 5.163572060123785e-05, 'epoch': 7.42}\n",
+ "{'loss': 0.1561, 'grad_norm': 0.8741617202758789, 'learning_rate': 4.809902740937224e-05, 'epoch': 7.6}\n",
+ "{'loss': 0.1574, 'grad_norm': 0.69870924949646, 'learning_rate': 4.4562334217506634e-05, 'epoch': 7.77}\n",
+ "{'loss': 0.1585, 'grad_norm': 0.7949297428131104, 'learning_rate': 4.1025641025641023e-05, 'epoch': 7.95}\n",
+ "{'loss': 0.1358, 'grad_norm': 0.47111940383911133, 'learning_rate': 3.7488947833775426e-05, 'epoch': 8.13}\n",
+ "{'loss': 0.1299, 'grad_norm': 0.6585063934326172, 'learning_rate': 3.3952254641909815e-05, 'epoch': 8.3}\n",
+ "{'loss': 0.1309, 'grad_norm': 0.4326067566871643, 'learning_rate': 3.041556145004421e-05, 'epoch': 8.48}\n",
+ "{'loss': 0.1325, 'grad_norm': 0.3893156349658966, 'learning_rate': 2.6878868258178604e-05, 'epoch': 8.66}\n",
+ "{'loss': 0.1334, 'grad_norm': 0.968989908695221, 'learning_rate': 2.3342175066313e-05, 'epoch': 8.83}\n",
+ "{'loss': 0.1335, 'grad_norm': 0.48122939467430115, 'learning_rate': 1.9805481874447392e-05, 'epoch': 9.01}\n",
+ "{'loss': 0.1172, 'grad_norm': 0.4522119462490082, 'learning_rate': 1.6268788682581788e-05, 'epoch': 9.19}\n",
+ "{'loss': 0.1181, 'grad_norm': 0.5501106977462769, 'learning_rate': 1.273209549071618e-05, 'epoch': 9.36}\n",
+ "{'loss': 0.1194, 'grad_norm': 0.46891143918037415, 'learning_rate': 9.195402298850575e-06, 'epoch': 9.54}\n",
+ "{'loss': 0.12, 'grad_norm': 0.5252432823181152, 'learning_rate': 5.658709106984969e-06, 'epoch': 9.72}\n",
+ "{'loss': 0.1228, 'grad_norm': 0.4517185688018799, 'learning_rate': 2.1220159151193635e-06, 'epoch': 9.89}\n",
+ "{'train_runtime': 8395.4379, 'train_samples_per_second': 5.393, 'train_steps_per_second': 0.674, 'train_loss': 0.5991904156789342, 'epoch': 10.0}\n",
+ "100%|█████████████████████████████████████| 5660/5660 [2:19:55<00:00, 1.48s/it]\n",
+ "(5) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "8395.4379 seconds used for training.\n",
+ "139.92 minutes used for training.\n",
+ "Peak reserved memory = 3.945 GB.\n",
+ "Peak reserved memory for training = 0.0 GB.\n",
+ "Peak reserved memory % of max memory = 32.891 %.\n",
+ "Peak reserved memory for training % of max memory = 0.0 %.\n",
+ "Evaluating fine-tuned model: unsloth/Qwen2-1.5B-Instruct-bnb-4bit\n",
+ "100%|███████████████████████████████████████| 1133/1133 [50:44<00:00, 2.69s/it]\n",
+ " chinese ... unsloth/Qwen2-1.5B-Instruct-bnb-4bit(finetuned)\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Geng raised his pistol, squinted, and fire...\n",
+ "\n",
+ "[1 rows x 6 columns]\n",
+ "(6) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "3.945 GB of memory reserved.\n",
+ "Unsloth: Merging 4bit and LoRA weights to 4bit...\n",
+ "This might take 5 minutes...\n",
+ "Done.\n",
+ "Unsloth: Saving tokenizer... Done.\n",
+ "Unsloth: Saving model... This might take 10 minutes for Llama-7b... Done.\n",
+ "Unsloth: Merging 4bit and LoRA weights to 16bit...\n",
+ "Unsloth: Will use up to 29.87 out of 47.05 RAM for saving.\n",
+ "100%|███████████████████████████████████████████| 28/28 [00:00<00:00, 42.85it/s]\n",
+ "Unsloth: Saving tokenizer... Done.\n",
+ "Unsloth: Saving model... This might take 5 minutes for Llama-7b...\n",
+ "Done.\n",
+ "Unsloth: Converting qwen2 model. Can use fast conversion = False.\n",
+ "==((====))== Unsloth: Conversion from QLoRA to GGUF information\n",
+ " \\\\ /| [0] Installing llama.cpp will take 3 minutes.\n",
+ "O^O/ \\_/ \\ [1] Converting HF to GUUF 16bits will take 3 minutes.\n",
+ "\\ / [2] Converting GGUF 16bits to ['q5_k_m'] will take 10 minutes each.\n",
+ " \"-____-\" In total, you will have to wait at least 16 minutes.\n",
+ "\n",
+ "Unsloth: [0] Installing llama.cpp. This will take 3 minutes...\n",
+ "Unsloth: [1] Converting model at models/Qwen2-1.5B-Instruct-bnb-4bit-MAC-q5_k_m into bf16 GGUF format.\n",
+ "The output location will be ./models/Qwen2-1.5B-Instruct-bnb-4bit-MAC-q5_k_m/unsloth.BF16.gguf\n",
+ "This will take 3 minutes...\n",
+ "INFO:hf-to-gguf:Loading model: Qwen2-1.5B-Instruct-bnb-4bit-MAC-q5_k_m\n",
+ "INFO:gguf.gguf_writer:gguf: This GGUF file is for Little Endian only\n",
+ "INFO:hf-to-gguf:Set model parameters\n",
+ "INFO:hf-to-gguf:gguf: context length = 32768\n",
+ "INFO:hf-to-gguf:gguf: embedding length = 1536\n",
+ "INFO:hf-to-gguf:gguf: feed forward length = 8960\n",
+ "INFO:hf-to-gguf:gguf: head count = 12\n",
+ "INFO:hf-to-gguf:gguf: key-value head count = 2\n",
+ "INFO:hf-to-gguf:gguf: rope theta = 1000000.0\n",
+ "INFO:hf-to-gguf:gguf: rms norm epsilon = 1e-06\n",
+ "INFO:hf-to-gguf:gguf: file type = 32\n",
+ "INFO:hf-to-gguf:Set model tokenizer\n",
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "INFO:gguf.vocab:Adding 151387 merge(s).\n",
+ "INFO:gguf.vocab:Setting special token type eos to 151645\n",
+ "INFO:gguf.vocab:Setting special token type pad to 151643\n",
+ "INFO:gguf.vocab:Setting special token type bos to 151643\n",
+ "INFO:gguf.vocab:Setting chat_template to {% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\n",
+ "You are a helpful assistant.<|im_end|>\n",
+ "' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n",
+ "' + message['content'] + '<|im_end|>' + '\n",
+ "'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n",
+ "' }}{% endif %}\n",
+ "INFO:hf-to-gguf:Exporting model to 'models/Qwen2-1.5B-Instruct-bnb-4bit-MAC-q5_k_m/unsloth.BF16.gguf'\n",
+ "INFO:hf-to-gguf:gguf: loading model part 'model.safetensors'\n",
+ "INFO:hf-to-gguf:token_embd.weight, torch.bfloat16 --> BF16, shape = {1536, 151936}\n",
+ "INFO:hf-to-gguf:blk.0.attn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.0.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.0.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.0.ffn_up.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.0.ffn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.0.attn_k.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.0.attn_k.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.0.attn_output.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.0.attn_q.bias, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.0.attn_q.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.0.attn_v.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.0.attn_v.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.1.attn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.1.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.1.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.1.ffn_up.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.1.ffn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.1.attn_k.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.1.attn_k.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.1.attn_output.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.1.attn_q.bias, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.1.attn_q.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.1.attn_v.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.1.attn_v.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.10.attn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.10.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.10.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.10.ffn_up.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.10.ffn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.10.attn_k.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.10.attn_k.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.10.attn_output.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.10.attn_q.bias, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.10.attn_q.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.10.attn_v.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.10.attn_v.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.11.attn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.11.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.11.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.11.ffn_up.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.11.ffn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.11.attn_k.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.11.attn_k.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.11.attn_output.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.11.attn_q.bias, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.11.attn_q.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.11.attn_v.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.11.attn_v.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.12.attn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.12.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.12.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.12.ffn_up.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.12.ffn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.12.attn_k.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.12.attn_k.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.12.attn_output.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.12.attn_q.bias, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.12.attn_q.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.12.attn_v.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.12.attn_v.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.13.attn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.13.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.13.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.13.ffn_up.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.13.ffn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.13.attn_k.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.13.attn_k.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.13.attn_output.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.13.attn_q.bias, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.13.attn_q.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.13.attn_v.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.13.attn_v.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.14.attn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.14.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.14.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.14.ffn_up.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.14.ffn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.14.attn_k.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.14.attn_k.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.14.attn_output.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.14.attn_q.bias, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.14.attn_q.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.14.attn_v.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.14.attn_v.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.15.attn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.15.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.15.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.15.ffn_up.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.15.ffn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.15.attn_k.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.15.attn_k.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.15.attn_output.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.15.attn_q.bias, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.15.attn_q.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.15.attn_v.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.15.attn_v.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.16.attn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.16.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.16.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.16.ffn_up.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.16.ffn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.16.attn_k.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.16.attn_k.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.16.attn_output.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.16.attn_q.bias, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.16.attn_q.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.16.attn_v.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.16.attn_v.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.17.attn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.17.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.17.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.17.ffn_up.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.17.ffn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.17.attn_k.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.17.attn_k.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.17.attn_output.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.17.attn_q.bias, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.17.attn_q.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.17.attn_v.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.17.attn_v.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.18.attn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.18.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.18.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.18.ffn_up.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.18.ffn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.18.attn_k.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.18.attn_k.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.18.attn_output.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.18.attn_q.bias, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.18.attn_q.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.18.attn_v.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.18.attn_v.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.19.attn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.19.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.19.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.19.ffn_up.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.19.ffn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.19.attn_k.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.19.attn_k.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.19.attn_output.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.19.attn_q.bias, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.19.attn_q.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.19.attn_v.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.19.attn_v.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.2.attn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.2.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.2.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.2.ffn_up.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.2.ffn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.2.attn_k.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.2.attn_k.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.2.attn_output.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.2.attn_q.bias, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.2.attn_q.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.2.attn_v.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.2.attn_v.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.20.attn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.20.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.20.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.20.ffn_up.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.20.ffn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.20.attn_k.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.20.attn_k.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.20.attn_output.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.20.attn_q.bias, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.20.attn_q.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.20.attn_v.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.20.attn_v.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.21.attn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.21.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.21.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.21.ffn_up.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.21.ffn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.21.attn_k.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.21.attn_k.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.21.attn_output.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.21.attn_q.bias, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.21.attn_q.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.21.attn_v.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.21.attn_v.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.22.attn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.22.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.22.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.22.ffn_up.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.22.ffn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.22.attn_k.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.22.attn_k.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.22.attn_output.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.22.attn_q.bias, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.22.attn_q.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.22.attn_v.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.22.attn_v.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.23.attn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.23.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.23.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.23.ffn_up.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.23.ffn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.23.attn_k.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.23.attn_k.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.23.attn_output.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.23.attn_q.bias, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.23.attn_q.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.23.attn_v.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.23.attn_v.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.24.attn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.24.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.24.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.24.ffn_up.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.24.ffn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.24.attn_k.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.24.attn_k.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.24.attn_output.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.24.attn_q.bias, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.24.attn_q.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.24.attn_v.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.24.attn_v.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.25.attn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.25.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.25.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.25.ffn_up.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.25.ffn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.25.attn_k.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.25.attn_k.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.25.attn_output.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.25.attn_q.bias, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.25.attn_q.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.25.attn_v.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.25.attn_v.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.26.attn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.26.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.26.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.26.ffn_up.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.26.ffn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.26.attn_k.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.26.attn_k.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.26.attn_output.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.26.attn_q.bias, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.26.attn_q.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.26.attn_v.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.26.attn_v.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.27.attn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.27.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.27.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.27.ffn_up.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.27.ffn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.27.attn_k.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.27.attn_k.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.27.attn_output.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.27.attn_q.bias, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.27.attn_q.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.27.attn_v.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.27.attn_v.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.3.attn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.3.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.3.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.3.ffn_up.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.3.ffn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.3.attn_k.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.3.attn_k.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.3.attn_output.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.3.attn_q.bias, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.3.attn_q.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.3.attn_v.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.3.attn_v.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.4.attn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.4.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.4.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.4.ffn_up.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.4.ffn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.4.attn_k.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.4.attn_k.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.4.attn_output.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.4.attn_q.bias, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.4.attn_q.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.4.attn_v.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.4.attn_v.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.5.attn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.5.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.5.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.5.ffn_up.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.5.ffn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.5.attn_k.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.5.attn_k.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.5.attn_output.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.5.attn_q.bias, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.5.attn_q.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.5.attn_v.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.5.attn_v.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.6.attn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.6.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.6.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.6.ffn_up.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.6.ffn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.6.attn_k.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.6.attn_k.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.6.attn_output.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.6.attn_q.bias, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.6.attn_q.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.6.attn_v.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.6.attn_v.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.7.attn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.7.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.7.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.7.ffn_up.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.7.ffn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.7.attn_k.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.7.attn_k.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.7.attn_output.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.7.attn_q.bias, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.7.attn_q.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.7.attn_v.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.7.attn_v.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.8.attn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.8.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.8.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.8.ffn_up.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.8.ffn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.8.attn_k.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.8.attn_k.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.8.attn_output.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.8.attn_q.bias, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.8.attn_q.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.8.attn_v.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.8.attn_v.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.9.attn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.9.ffn_down.weight, torch.bfloat16 --> BF16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.9.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.9.ffn_up.weight, torch.bfloat16 --> BF16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.9.ffn_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.9.attn_k.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.9.attn_k.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.9.attn_output.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.9.attn_q.bias, torch.bfloat16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.9.attn_q.weight, torch.bfloat16 --> BF16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.9.attn_v.bias, torch.bfloat16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.9.attn_v.weight, torch.bfloat16 --> BF16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:output_norm.weight, torch.bfloat16 --> F32, shape = {1536}\n",
+ "Writing: 0%| | 0.00/3.09G [00:00, ?byte/s]Traceback (most recent call last):\n",
+ " File \"/home/inflaton/code/projects/courses/novel-translation/llama.cpp/convert-hf-to-gguf.py\", line 2887, in \n",
+ " main()\n",
+ " File \"/home/inflaton/code/projects/courses/novel-translation/llama.cpp/convert-hf-to-gguf.py\", line 2881, in main\n",
+ " model_instance.write()\n",
+ " File \"/home/inflaton/code/projects/courses/novel-translation/llama.cpp/convert-hf-to-gguf.py\", line 331, in write\n",
+ " self.gguf_writer.write_tensors_to_file(progress=True)\n",
+ " File \"/home/inflaton/code/projects/courses/novel-translation/llama.cpp/gguf-py/gguf/gguf_writer.py\", line 312, in write_tensors_to_file\n",
+ " ti.tensor.tofile(self.fout)\n",
+ " File \"/home/inflaton/code/projects/courses/novel-translation/llama.cpp/gguf-py/gguf/lazy.py\", line 233, in tofile\n",
+ " eager = LazyNumpyTensor.to_eager(self)\n",
+ " File \"/home/inflaton/code/projects/courses/novel-translation/llama.cpp/gguf-py/gguf/lazy.py\", line 193, in to_eager\n",
+ " return cls._recurse_apply(t, simple_to_eager)\n",
+ " File \"/home/inflaton/code/projects/courses/novel-translation/llama.cpp/gguf-py/gguf/lazy.py\", line 109, in _recurse_apply\n",
+ " return fn(o)\n",
+ " File \"/home/inflaton/code/projects/courses/novel-translation/llama.cpp/gguf-py/gguf/lazy.py\", line 185, in simple_to_eager\n",
+ " lt._data = lt._func(lt._args)\n",
+ " File \"/home/inflaton/code/projects/courses/novel-translation/llama.cpp/gguf-py/gguf/lazy.py\", line 158, in \n",
+ " return cls(meta=cls.eager_to_meta(res), lazy=shared_lazy, args=args, func=lambda a: fn(*a, **kwargs))\n",
+ " File \"/home/inflaton/code/projects/courses/novel-translation/llama.cpp/gguf-py/gguf/quants.py\", line 52, in __quantize_bf16_array\n",
+ " return __apply_over_grouped_rows(__compute_fp32_to_bf16, arr=n, otype=np.int16, oshape=n.shape)\n",
+ " File \"/home/inflaton/code/projects/courses/novel-translation/llama.cpp/gguf-py/gguf/quants.py\", line 47, in __apply_over_grouped_rows\n",
+ " np.concatenate([func(group).ravel() for group in np.array_split(rows, n_groups)], axis=0, out=out)\n",
+ " File \"/home/inflaton/code/projects/courses/novel-translation/llama.cpp/gguf-py/gguf/quants.py\", line 47, in \n",
+ " np.concatenate([func(group).ravel() for group in np.array_split(rows, n_groups)], axis=0, out=out)\n",
+ " File \"/home/inflaton/code/projects/courses/novel-translation/llama.cpp/gguf-py/gguf/quants.py\", line 30, in __compute_fp32_to_bf16\n",
+ " n = np.where((n & 0x7fffffff) > 0x7f800000, (n & 0xffff0000) | (64 << 16), n)\n",
+ "OverflowError: Python integer 4294901760 out of bounds for int32\n",
+ "Writing: 0%| | 0.00/3.09G [00:00, ?byte/s]\n",
+ "Unsloth: Conversion completed! Output location: ./models/Qwen2-1.5B-Instruct-bnb-4bit-MAC-q5_k_m/unsloth.BF16.gguf\n",
+ "Unsloth: [2] Converting GGUF 16bit into q5_k_m. This will take 20 minutes...\n",
+ "main: build = 3192 (b1ef562b)\n",
+ "main: built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu\n",
+ "main: quantizing './models/Qwen2-1.5B-Instruct-bnb-4bit-MAC-q5_k_m/unsloth.BF16.gguf' to './models/Qwen2-1.5B-Instruct-bnb-4bit-MAC-q5_k_m/unsloth.Q5_K_M.gguf' as Q5_K_M using 48 threads\n",
+ "llama_model_quantize: failed to quantize: tensor 'token_embd.weight' data is not within the file bounds, model is corrupted or incomplete\n",
+ "main: failed to quantize model from './models/Qwen2-1.5B-Instruct-bnb-4bit-MAC-q5_k_m/unsloth.BF16.gguf'\n",
+ "Traceback (most recent call last):\n",
+ " File \"/home/inflaton/code/projects/courses/novel-translation/tune.py\", line 136, in \n",
+ " save_model(model, tokenizer)\n",
+ " File \"/home/inflaton/code/projects/courses/novel-translation/translation_engine.py\", line 219, in save_model\n",
+ " File \"/home/inflaton/miniconda3/envs/unsloth_env/lib/python3.10/site-packages/unsloth/save.py\", line 1527, in unsloth_save_pretrained_gguf\n",
+ " all_file_locations = save_to_gguf(model_type, model_dtype, is_sentencepiece_model,\n",
+ " File \"/home/inflaton/miniconda3/envs/unsloth_env/lib/python3.10/site-packages/unsloth/save.py\", line 1113, in save_to_gguf\n",
+ " raise RuntimeError(\n",
+ "RuntimeError: Unsloth: Quantization failed! You might have to compile llama.cpp yourself, then run this again.\n",
+ "You do not need to close this Python program. Run the following commands in a new terminal:\n",
+ "You must run this in the same folder as you're saving your model.\n",
+ "git clone --recursive https://github.com/ggerganov/llama.cpp\n",
+ "cd llama.cpp && make clean && make all -j\n",
+ "Once that's done, redo the quantization.\n"
+ ]
+ }
+ ],
+ "source": [
+ "!./tune-small.sh"
+ ]
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "application/vnd.databricks.v1+notebook": {
+ "dashboards": [],
+ "environmentMetadata": null,
+ "language": "python",
+ "notebookMetadata": {
+ "pythonIndentUnit": 4
+ },
+ "notebookName": "07_MAC_+_Qwen2-7B-Instructi_Unsloth_train",
+ "widgets": {}
+ },
+ "colab": {
+ "gpuType": "T4",
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.14"
+ },
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "036fc5746f43416db18c19ad8fd36677": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "06e806c82c7b4cbea31c5358dd9c3434": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "087b76a8b7514269b1f0ab29b062e444": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_a069d2ab23824f29aa320ac256e2cfe9",
+ "placeholder": "",
+ "style": "IPY_MODEL_06e806c82c7b4cbea31c5358dd9c3434",
+ "value": "Map (num_proc=2): 100%"
+ }
+ },
+ "09b76013aa9e45efb6deb23a7a0d0925": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_dea41c5260884aa6879b5e1d1697b14f",
+ "placeholder": "",
+ "style": "IPY_MODEL_89965917796a4f81b899fdc7685f33df",
+ "value": "config.json: 100%"
+ }
+ },
+ "0a92c56bfa134ef583220d7ef0b13e17": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "0c34be936c8145d3ab41282f30a70713": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "0f8b6bfe16894500838793f2491d403f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "177c78fce95d4b4ab33057c5a048d693": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "1f44c9ce1adf470cbb19784493ed209f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_0c34be936c8145d3ab41282f30a70713",
+ "placeholder": "",
+ "style": "IPY_MODEL_0a92c56bfa134ef583220d7ef0b13e17",
+ "value": "model.safetensors: 100%"
+ }
+ },
+ "201b59ccd9f845e197029b57e424aefc": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "2157f01726d748f8a9ae4a00664430da": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "21db8a77b00d4a4e82fdfa608657531f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "26e4202cca81496a90d15a0dd4ca9cf1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_ba90fdb8822d47dab7ba203bee297f37",
+ "IPY_MODEL_61560ff6a36b44f4a9dfdae5c52791d4",
+ "IPY_MODEL_95fbe66647904c06a20f640630d6dc0e"
+ ],
+ "layout": "IPY_MODEL_57182a263d324a3dbf1471c74290a0d5"
+ }
+ },
+ "27155728b6b84cb199c91c940095d0a8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_6b91feeed5464877991ac2c207aebe7c",
+ "IPY_MODEL_cca8113c54c0495daedce1327bf9c68b",
+ "IPY_MODEL_2e63a29e2f7247bba5beede9a568c99f"
+ ],
+ "layout": "IPY_MODEL_5c9d781c28944f3eb86e2a6d44efdf18"
+ }
+ },
+ "271ddaa553a042d09b6db7b450643d8f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "2a58d04b428c46f4b3dbadd3bc6cd529": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2d18ddf6482c4d97829ac0e5a7b9868f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_9f679ad3ec7f4fe8ad0510ffb57bc2ab",
+ "IPY_MODEL_f2df530d22c74977b249dd9fb5f4829b",
+ "IPY_MODEL_89b2ef0dbfea47ab8e6f8d659e3351d1"
+ ],
+ "layout": "IPY_MODEL_3056b148aa9f4e6e8aa3b61d26886255"
+ }
+ },
+ "2e5087c76f98437cb5dc729230358cba": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2e63a29e2f7247bba5beede9a568c99f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b993eaec6b224440bf80c0958c6fb536",
+ "placeholder": "",
+ "style": "IPY_MODEL_de868e26e7154f62aa86223a539ad421",
+ "value": " 464/464 [00:00<00:00, 27.1kB/s]"
+ }
+ },
+ "2f6c70dd266c4816bfad3fd3d192929a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "30307300bc4e4baf96560e30969a82b6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e36a3f9eff0e4cf68834d66b0213ae96",
+ "placeholder": "",
+ "style": "IPY_MODEL_a0037bdccf254159becde630bee3d1db",
+ "value": "generation_config.json: 100%"
+ }
+ },
+ "3056b148aa9f4e6e8aa3b61d26886255": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "30cdc32298134cb0be4d41615b9e5774": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "3572201bd4d74a58b7a665f9bdfdcdba": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "35b0e8c26d6640e9bd0ed7b242a423d8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_2e5087c76f98437cb5dc729230358cba",
+ "max": 51760,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_036fc5746f43416db18c19ad8fd36677",
+ "value": 51760
+ }
+ },
+ "36166c7bcb854b34aca1f41a5d6ea50b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "370692d819df41828b48c4ad446f977b": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "39b29a75374b45c0a22506010be2b84e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_30cdc32298134cb0be4d41615b9e5774",
+ "max": 1179,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_47928317548c454bba6358ab132e8dee",
+ "value": 1179
+ }
+ },
+ "3cf2dd993b5e4d3daecf61e4bab5a404": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_087b76a8b7514269b1f0ab29b062e444",
+ "IPY_MODEL_35b0e8c26d6640e9bd0ed7b242a423d8",
+ "IPY_MODEL_54ad89e05fd74576b9b8b5b5a10eaf8d"
+ ],
+ "layout": "IPY_MODEL_a41dc44766444a998bec2d777f249d23"
+ }
+ },
+ "43dec2ede91341f5af60eb522e18e984": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4463edd481c1467f914c7dcd6c6e6ffc": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "47928317548c454bba6358ab132e8dee": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "49277aeeac16434a865a4d12308b1abc": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4ae7e449e4ea4c729b5f34607c18ebae": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4b2061b8a73c43ffb0c2f83daf0d0183": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4c4c88d4c701450692fa0f6b0c5764b0": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4c666f4ace3943f8b80ecd20e7503236": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "4ccedf0d93094e63b57a0f8a434fba06": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4463edd481c1467f914c7dcd6c6e6ffc",
+ "max": 44307561,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_6d3b9a05db0b4dadb638c686faa0c40a",
+ "value": 44307561
+ }
+ },
+ "4dcf6ff672d24983a1877a8431709aa9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_5807d5fb827d490fb3bc698f801ffff5",
+ "placeholder": "",
+ "style": "IPY_MODEL_c4f2b06a82fd4987b8b659524a7b503b",
+ "value": "Generating train split: 100%"
+ }
+ },
+ "4ea63adfce694725bdba878aef709dd3": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5234566b1bfc4655b8d582ea5b46ed9f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "54ad89e05fd74576b9b8b5b5a10eaf8d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_fdb1941405ed4e4aa06019933892deb3",
+ "placeholder": "",
+ "style": "IPY_MODEL_668d5377ca56426a99753867e6e24862",
+ "value": " 51760/51760 [01:02<00:00, 1131.51 examples/s]"
+ }
+ },
+ "56aee4853b7740e6a977254f5d1fa66d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "57182a263d324a3dbf1471c74290a0d5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5807d5fb827d490fb3bc698f801ffff5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5c9d781c28944f3eb86e2a6d44efdf18": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5f40db8173dd4d76b6ef5ed6d9ec8b6e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "61560ff6a36b44f4a9dfdae5c52791d4": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_db19fc8d37db4e45a5790a876836d8c4",
+ "max": 11610,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_36166c7bcb854b34aca1f41a5d6ea50b",
+ "value": 11610
+ }
+ },
+ "6578fd7acdb54c4c93528ea431fd0144": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_370692d819df41828b48c4ad446f977b",
+ "placeholder": "",
+ "style": "IPY_MODEL_a0bf9160eb2647409b3200270914b90f",
+ "value": " 50.6k/50.6k [00:00<00:00, 2.71MB/s]"
+ }
+ },
+ "668d5377ca56426a99753867e6e24862": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "697f027529b54ee9956bae78a11e0611": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "69ac12aec0714318bf2c83d4f4e745f5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "6b2012c3f88547af8884a9ea90e3164b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_938f45f1b3e24118b815d96ae34ba86a",
+ "placeholder": "",
+ "style": "IPY_MODEL_9367047a800747f79c6b225d92397846",
+ "value": " 44.3M/44.3M [00:01<00:00, 31.0MB/s]"
+ }
+ },
+ "6b91feeed5464877991ac2c207aebe7c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4b2061b8a73c43ffb0c2f83daf0d0183",
+ "placeholder": "",
+ "style": "IPY_MODEL_69ac12aec0714318bf2c83d4f4e745f5",
+ "value": "special_tokens_map.json: 100%"
+ }
+ },
+ "6d3b9a05db0b4dadb638c686faa0c40a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "6dbbedeca9314e66ae50e44ffa31a414": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "6e34619b45934040b6092e6fb01ea7fe": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "71ce208e20d6483abb9ed923510c86d7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d69dc491b3ab44d7852b21873ed7bb7f",
+ "placeholder": "",
+ "style": "IPY_MODEL_f401d53bf28e44eb906bce6c05412662",
+ "value": " 51760/51760 [00:01<00:00, 45512.81 examples/s]"
+ }
+ },
+ "7358cdad832342c983e31efb8754ab78": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "73e352a3404f4c7dad0737f57d29e92f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_988a0e8c1f89446086858da0a891a79c",
+ "IPY_MODEL_4ccedf0d93094e63b57a0f8a434fba06",
+ "IPY_MODEL_6b2012c3f88547af8884a9ea90e3164b"
+ ],
+ "layout": "IPY_MODEL_7e29cb8dd4df4d5b94407cd8fd3f2011"
+ }
+ },
+ "74501720ac7e4dbb911a4a99b3633bc6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "78e5400bff924a92a4cc61c4ff18b182": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b9b313fd861948f5aba25b24b1518d30",
+ "placeholder": "",
+ "style": "IPY_MODEL_4c666f4ace3943f8b80ecd20e7503236",
+ "value": " 1.18k/1.18k [00:00<00:00, 31.3kB/s]"
+ }
+ },
+ "7975adbc2ec5489ea7fa0167e620d85c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_6e34619b45934040b6092e6fb01ea7fe",
+ "max": 51760,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_271ddaa553a042d09b6db7b450643d8f",
+ "value": 51760
+ }
+ },
+ "7e29cb8dd4df4d5b94407cd8fd3f2011": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "810ff6c0e17d4fa09a30fef27eacff90": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "89965917796a4f81b899fdc7685f33df": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "89b2ef0dbfea47ab8e6f8d659e3351d1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b8908fa0df3743ecb9d12983a739104f",
+ "placeholder": "",
+ "style": "IPY_MODEL_177c78fce95d4b4ab33057c5a048d693",
+ "value": " 9.09M/9.09M [00:00<00:00, 32.6MB/s]"
+ }
+ },
+ "8b3505352a5a42bf910428c40ce40465": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_49277aeeac16434a865a4d12308b1abc",
+ "placeholder": "",
+ "style": "IPY_MODEL_2157f01726d748f8a9ae4a00664430da",
+ "value": " 5.70G/5.70G [01:02<00:00, 30.1MB/s]"
+ }
+ },
+ "8fc142b628fb40568730234de1cafde2": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4ae7e449e4ea4c729b5f34607c18ebae",
+ "max": 172,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_3572201bd4d74a58b7a665f9bdfdcdba",
+ "value": 172
+ }
+ },
+ "9367047a800747f79c6b225d92397846": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "938f45f1b3e24118b815d96ae34ba86a": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "95fbe66647904c06a20f640630d6dc0e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b0a370dc20654b279b9680692e34418e",
+ "placeholder": "",
+ "style": "IPY_MODEL_cfeb365ddf7548d58b2557f22737fcf5",
+ "value": " 11.6k/11.6k [00:00<00:00, 716kB/s]"
+ }
+ },
+ "988a0e8c1f89446086858da0a891a79c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_ad2be500fc164c0f86f33e914ef8e6a0",
+ "placeholder": "",
+ "style": "IPY_MODEL_5234566b1bfc4655b8d582ea5b46ed9f",
+ "value": "Downloading data: 100%"
+ }
+ },
+ "98c58f23f4d549518832cb2d18f796e8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_09b76013aa9e45efb6deb23a7a0d0925",
+ "IPY_MODEL_39b29a75374b45c0a22506010be2b84e",
+ "IPY_MODEL_78e5400bff924a92a4cc61c4ff18b182"
+ ],
+ "layout": "IPY_MODEL_2a58d04b428c46f4b3dbadd3bc6cd529"
+ }
+ },
+ "99fdbb0300c14c139d1937c646f0cfe7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_7358cdad832342c983e31efb8754ab78",
+ "placeholder": "",
+ "style": "IPY_MODEL_e9adf418296e436fb48bb9f78885598b",
+ "value": " 51760/51760 [00:01<00:00, 38665.95 examples/s]"
+ }
+ },
+ "9f679ad3ec7f4fe8ad0510ffb57bc2ab": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4ea63adfce694725bdba878aef709dd3",
+ "placeholder": "",
+ "style": "IPY_MODEL_74501720ac7e4dbb911a4a99b3633bc6",
+ "value": "tokenizer.json: 100%"
+ }
+ },
+ "a0037bdccf254159becde630bee3d1db": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "a069d2ab23824f29aa320ac256e2cfe9": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a0bf9160eb2647409b3200270914b90f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "a41dc44766444a998bec2d777f249d23": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a8464a4c711e4e00aafdfc919b60d07e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_fb995c740590427b882572c81d4e848c",
+ "placeholder": "",
+ "style": "IPY_MODEL_201b59ccd9f845e197029b57e424aefc",
+ "value": " 172/172 [00:00<00:00, 12.0kB/s]"
+ }
+ },
+ "a9f0cc51fc3d4d7b874c32dcf1c5bdf2": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ad2be500fc164c0f86f33e914ef8e6a0": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b0240cd9a4554b29ae11f8051984a1c6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_edaf890370314a218f138015faa0b05d",
+ "placeholder": "",
+ "style": "IPY_MODEL_697f027529b54ee9956bae78a11e0611",
+ "value": "Map: 100%"
+ }
+ },
+ "b0a370dc20654b279b9680692e34418e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b518dcee69074b87be73957cd810e7ed": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d891f8d0b1fc462f8008d02bb2a15692",
+ "placeholder": "",
+ "style": "IPY_MODEL_cced8fd7e998472794f3f3e3018956a5",
+ "value": "tokenizer_config.json: 100%"
+ }
+ },
+ "b8908fa0df3743ecb9d12983a739104f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b993eaec6b224440bf80c0958c6fb536": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b9b313fd861948f5aba25b24b1518d30": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ba90fdb8822d47dab7ba203bee297f37": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_0f8b6bfe16894500838793f2491d403f",
+ "placeholder": "",
+ "style": "IPY_MODEL_bb19f6c747754682a514373a3a0535ba",
+ "value": "Downloading readme: 100%"
+ }
+ },
+ "bb19f6c747754682a514373a3a0535ba": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "bc883d4cf13e4f8b8a4fe5f410cb6efd": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e9159e03e61f4f56978ece9c3bca49b2",
+ "max": 51760,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_810ff6c0e17d4fa09a30fef27eacff90",
+ "value": 51760
+ }
+ },
+ "c161d94df0f04feba9542237e0856c22": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "c22f71b1f85843209d7e5321506b9cb9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_1f44c9ce1adf470cbb19784493ed209f",
+ "IPY_MODEL_f1addc4479d849879e743cf9089e6540",
+ "IPY_MODEL_8b3505352a5a42bf910428c40ce40465"
+ ],
+ "layout": "IPY_MODEL_4c4c88d4c701450692fa0f6b0c5764b0"
+ }
+ },
+ "c4f2b06a82fd4987b8b659524a7b503b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "cca8113c54c0495daedce1327bf9c68b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e02f9b7849c64531835eb77b860d1c93",
+ "max": 464,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_56aee4853b7740e6a977254f5d1fa66d",
+ "value": 464
+ }
+ },
+ "cced8fd7e998472794f3f3e3018956a5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "cf245afeb1c04f29a24d291608c3d157": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_b518dcee69074b87be73957cd810e7ed",
+ "IPY_MODEL_e29104486d594b2992d7285e0ef77371",
+ "IPY_MODEL_6578fd7acdb54c4c93528ea431fd0144"
+ ],
+ "layout": "IPY_MODEL_d35db8148a354c56aaac56dbae22536f"
+ }
+ },
+ "cfe8cae0e22b495bafa221a63d13b283": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "cfeb365ddf7548d58b2557f22737fcf5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "d1b47d39450d4019ae85c9b2f943eeaf": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_4dcf6ff672d24983a1877a8431709aa9",
+ "IPY_MODEL_7975adbc2ec5489ea7fa0167e620d85c",
+ "IPY_MODEL_71ce208e20d6483abb9ed923510c86d7"
+ ],
+ "layout": "IPY_MODEL_cfe8cae0e22b495bafa221a63d13b283"
+ }
+ },
+ "d35db8148a354c56aaac56dbae22536f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d69dc491b3ab44d7852b21873ed7bb7f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d891f8d0b1fc462f8008d02bb2a15692": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d8e5318cead340c4adbeaccc05d39225": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "daf4cd890b35422683d22fd30bc71e83": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_b0240cd9a4554b29ae11f8051984a1c6",
+ "IPY_MODEL_bc883d4cf13e4f8b8a4fe5f410cb6efd",
+ "IPY_MODEL_99fdbb0300c14c139d1937c646f0cfe7"
+ ],
+ "layout": "IPY_MODEL_c161d94df0f04feba9542237e0856c22"
+ }
+ },
+ "db19fc8d37db4e45a5790a876836d8c4": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "de868e26e7154f62aa86223a539ad421": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "dea41c5260884aa6879b5e1d1697b14f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e02f9b7849c64531835eb77b860d1c93": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e29104486d594b2992d7285e0ef77371": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_a9f0cc51fc3d4d7b874c32dcf1c5bdf2",
+ "max": 50641,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_2f6c70dd266c4816bfad3fd3d192929a",
+ "value": 50641
+ }
+ },
+ "e36a3f9eff0e4cf68834d66b0213ae96": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e9159e03e61f4f56978ece9c3bca49b2": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e9adf418296e436fb48bb9f78885598b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "edaf890370314a218f138015faa0b05d": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "f1addc4479d849879e743cf9089e6540": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_43dec2ede91341f5af60eb522e18e984",
+ "max": 5702746405,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_d8e5318cead340c4adbeaccc05d39225",
+ "value": 5702746405
+ }
+ },
+ "f2df530d22c74977b249dd9fb5f4829b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_21db8a77b00d4a4e82fdfa608657531f",
+ "max": 9085698,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_6dbbedeca9314e66ae50e44ffa31a414",
+ "value": 9085698
+ }
+ },
+ "f401d53bf28e44eb906bce6c05412662": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "fb995c740590427b882572c81d4e848c": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "fce7a61c25ec4390af43d92b7c473a45": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_30307300bc4e4baf96560e30969a82b6",
+ "IPY_MODEL_8fc142b628fb40568730234de1cafde2",
+ "IPY_MODEL_a8464a4c711e4e00aafdfc919b60d07e"
+ ],
+ "layout": "IPY_MODEL_5f40db8173dd4d76b6ef5ed6d9ec8b6e"
+ }
+ },
+ "fdb1941405ed4e4aa06019933892deb3": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ }
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
\ No newline at end of file
diff --git a/notebooks/04_tune-small-no-flash-attn.ipynb b/notebooks/04_tune-small-no-flash-attn.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..f56083454905fe1ba6f9bd3e0538faf3c84faad0
--- /dev/null
+++ b/notebooks/04_tune-small-no-flash-attn.ipynb
@@ -0,0 +1,4607 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "0ea8b46b-839b-445b-8043-ccdf4e920ace",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [],
+ "source": [
+ "%load_ext autoreload\n",
+ "%autoreload 2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "6d394937-6c99-4a7c-9d32-7600a280032f",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "workding dir: /home/inflaton/code/projects/courses/novel-translation\n"
+ ]
+ }
+ ],
+ "source": [
+ "import os\n",
+ "import sys\n",
+ "from pathlib import Path\n",
+ "\n",
+ "workding_dir = str(Path.cwd().parent)\n",
+ "os.chdir(workding_dir)\n",
+ "sys.path.append(workding_dir)\n",
+ "print(\"workding dir:\", workding_dir)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "9f67ec60-2f24-411c-84eb-0dd664b44775",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "loading env vars from: /home/inflaton/code/projects/courses/novel-translation/.env\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "True"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from dotenv import find_dotenv, load_dotenv\n",
+ "\n",
+ "found_dotenv = find_dotenv(\".env\")\n",
+ "\n",
+ "if len(found_dotenv) == 0:\n",
+ " found_dotenv = find_dotenv(\".env.example\")\n",
+ "print(f\"loading env vars from: {found_dotenv}\")\n",
+ "load_dotenv(found_dotenv, override=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "f1597656-8042-4878-9d3b-9ebfb8dd86dc",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "('unsloth/Qwen2-0.5B-Instruct-bnb-4bit',\n",
+ " True,\n",
+ " None,\n",
+ " None,\n",
+ " 2048,\n",
+ " 10,\n",
+ " None,\n",
+ " 'datasets/mac/mac.tsv',\n",
+ " 'results/mac-results_v3.csv')"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import os\n",
+ "\n",
+ "model_name = os.getenv(\"MODEL_NAME\")\n",
+ "token = os.getenv(\"HF_TOKEN\") or None\n",
+ "load_in_4bit = os.getenv(\"LOAD_IN_4BIT\") == \"true\"\n",
+ "local_model = os.getenv(\"LOCAL_MODEL\")\n",
+ "hub_model = os.getenv(\"HUB_MODEL\")\n",
+ "num_train_epochs = int(os.getenv(\"NUM_TRAIN_EPOCHS\") or 0)\n",
+ "data_path = os.getenv(\"DATA_PATH\")\n",
+ "results_path = os.getenv(\"RESULTS_PATH\")\n",
+ "\n",
+ "max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!\n",
+ "dtype = (\n",
+ " None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+\n",
+ ")\n",
+ "\n",
+ "model_name, load_in_4bit, local_model, hub_model, max_seq_length, num_train_epochs, dtype, data_path, results_path"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Fri Jun 21 18:14:08 2024 \n",
+ "+---------------------------------------------------------------------------------------+\n",
+ "| NVIDIA-SMI 545.23.07 Driver Version: 546.12 CUDA Version: 12.3 |\n",
+ "|-----------------------------------------+----------------------+----------------------+\n",
+ "| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
+ "| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n",
+ "| | | MIG M. |\n",
+ "|=========================================+======================+======================|\n",
+ "| 0 NVIDIA GeForce RTX 4080 ... On | 00000000:01:00.0 On | N/A |\n",
+ "| N/A 54C P5 7W / 150W | 234MiB / 12282MiB | 42% Default |\n",
+ "| | | N/A |\n",
+ "+-----------------------------------------+----------------------+----------------------+\n",
+ " \n",
+ "+---------------------------------------------------------------------------------------+\n",
+ "| Processes: |\n",
+ "| GPU GI CI PID Type Process name GPU Memory |\n",
+ "| ID ID Usage |\n",
+ "|=======================================================================================|\n",
+ "| No running processes found |\n",
+ "+---------------------------------------------------------------------------------------+\n"
+ ]
+ }
+ ],
+ "source": [
+ "!nvidia-smi"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[33mWARNING: Package(s) not found: flash-attn\u001b[0m\u001b[33m\n",
+ "\u001b[0m"
+ ]
+ }
+ ],
+ "source": [
+ "!pip show flash-attn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Current Directory:\n",
+ "/home/inflaton/code/projects/courses/novel-translation\n",
+ "Tuning unsloth/Qwen2-0.5B-Instruct-bnb-4bit\n",
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n",
+ "loading /home/inflaton/code/projects/courses/novel-translation/translation_engine_v3.py\n",
+ "loading env vars from: /home/inflaton/code/projects/courses/novel-translation/.env\n",
+ "unsloth/Qwen2-0.5B-Instruct-bnb-4bit True 2048 10 None datasets/mac/mac.tsv results/mac-results_v3.csv True True True\n",
+ "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.0 GB of memory reserved.\n",
+ "loading model: unsloth/Qwen2-0.5B-Instruct-bnb-4bit\n",
+ "==((====))== Unsloth: Fast Qwen2 patching release 2024.6\n",
+ " \\\\ /| GPU: NVIDIA GeForce RTX 4080 Laptop GPU. Max memory: 11.994 GB. Platform = Linux.\n",
+ "O^O/ \\_/ \\ Pytorch: 2.3.0. CUDA = 8.9. CUDA Toolkit = 12.1.\n",
+ "\\ / Bfloat16 = TRUE. Xformers = 0.0.26.post1. FA = False.\n",
+ " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n",
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.633 GB of memory reserved.\n",
+ "loading train/test data files\n",
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n",
+ "Evaluating base model: unsloth/Qwen2-0.5B-Instruct-bnb-4bit\n",
+ "100%|█████████████████████████████████████| 1133/1133 [1:21:58<00:00, 4.34s/it]\n",
+ " chinese ... unsloth/Qwen2-0.5B-Instruct-bnb-4bit\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... The gun is lifted by Old Teng, with his eyes c...\n",
+ "\n",
+ "[1 rows x 7 columns]\n",
+ "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "3.043 GB of memory reserved.\n",
+ "Unsloth 2024.6 patched 24 layers with 0 QKV layers, 24 O layers and 24 MLP layers.\n",
+ "(4) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "3.043 GB of memory reserved.\n",
+ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n",
+ " \\\\ /| Num examples = 4,528 | Num Epochs = 10\n",
+ "O^O/ \\_/ \\ Batch size per device = 2 | Gradient Accumulation steps = 4\n",
+ "\\ / Total batch size = 8 | Total steps = 5,660\n",
+ " \"-____-\" Number of trainable parameters = 8,798,208\n",
+ "{'loss': 1.9401, 'grad_norm': 0.9639493823051453, 'learning_rate': 0.00019664014146772768, 'epoch': 0.18}\n",
+ "{'loss': 1.7761, 'grad_norm': 0.8035507798194885, 'learning_rate': 0.0001931034482758621, 'epoch': 0.35}\n",
+ "{'loss': 1.7147, 'grad_norm': 0.9184380769729614, 'learning_rate': 0.00018956675508399648, 'epoch': 0.53}\n",
+ "{'loss': 1.7157, 'grad_norm': 0.7518640160560608, 'learning_rate': 0.00018603006189213086, 'epoch': 0.71}\n",
+ "{'loss': 1.6863, 'grad_norm': 0.8273285031318665, 'learning_rate': 0.00018249336870026527, 'epoch': 0.88}\n",
+ "{'loss': 1.608, 'grad_norm': 0.8058149814605713, 'learning_rate': 0.00017895667550839965, 'epoch': 1.06}\n",
+ "{'loss': 1.4919, 'grad_norm': 0.913749098777771, 'learning_rate': 0.00017541998231653406, 'epoch': 1.24}\n",
+ "{'loss': 1.5404, 'grad_norm': 0.9821408987045288, 'learning_rate': 0.00017188328912466844, 'epoch': 1.41}\n",
+ "{'loss': 1.4722, 'grad_norm': 1.0769108533859253, 'learning_rate': 0.00016834659593280285, 'epoch': 1.59}\n",
+ "{'loss': 1.4689, 'grad_norm': 0.990247368812561, 'learning_rate': 0.00016480990274093723, 'epoch': 1.77}\n",
+ "{'loss': 1.5277, 'grad_norm': 1.0138424634933472, 'learning_rate': 0.00016127320954907164, 'epoch': 1.94}\n",
+ "{'loss': 1.3496, 'grad_norm': 1.163719892501831, 'learning_rate': 0.000157736516357206, 'epoch': 2.12}\n",
+ "{'loss': 1.2155, 'grad_norm': 1.1911096572875977, 'learning_rate': 0.0001541998231653404, 'epoch': 2.3}\n",
+ "{'loss': 1.2574, 'grad_norm': 1.316228985786438, 'learning_rate': 0.0001506631299734748, 'epoch': 2.47}\n",
+ "{'loss': 1.2555, 'grad_norm': 1.343137502670288, 'learning_rate': 0.0001471264367816092, 'epoch': 2.65}\n",
+ "{'loss': 1.2662, 'grad_norm': 1.177164077758789, 'learning_rate': 0.0001435897435897436, 'epoch': 2.83}\n",
+ "{'loss': 1.2342, 'grad_norm': 1.1601923704147339, 'learning_rate': 0.000140053050397878, 'epoch': 3.0}\n",
+ "{'loss': 0.9801, 'grad_norm': 1.4796860218048096, 'learning_rate': 0.0001365163572060124, 'epoch': 3.18}\n",
+ "{'loss': 0.9924, 'grad_norm': 1.5130630731582642, 'learning_rate': 0.00013297966401414678, 'epoch': 3.36}\n",
+ "{'loss': 0.9984, 'grad_norm': 1.5380527973175049, 'learning_rate': 0.0001294429708222812, 'epoch': 3.53}\n",
+ "{'loss': 0.9815, 'grad_norm': 1.570404291152954, 'learning_rate': 0.00012590627763041555, 'epoch': 3.71}\n",
+ "{'loss': 1.0242, 'grad_norm': 1.5608525276184082, 'learning_rate': 0.00012236958443854996, 'epoch': 3.89}\n",
+ "{'loss': 0.9313, 'grad_norm': 1.5546534061431885, 'learning_rate': 0.00011883289124668435, 'epoch': 4.06}\n",
+ "{'loss': 0.74, 'grad_norm': 1.7187141180038452, 'learning_rate': 0.00011529619805481875, 'epoch': 4.24}\n",
+ "{'loss': 0.7402, 'grad_norm': 1.848803162574768, 'learning_rate': 0.00011175950486295315, 'epoch': 4.42}\n",
+ "{'loss': 0.7757, 'grad_norm': 2.113391160964966, 'learning_rate': 0.00010822281167108754, 'epoch': 4.59}\n",
+ "{'loss': 0.7729, 'grad_norm': 2.1327595710754395, 'learning_rate': 0.00010468611847922194, 'epoch': 4.77}\n",
+ "{'loss': 0.7938, 'grad_norm': 2.1906380653381348, 'learning_rate': 0.00010114942528735633, 'epoch': 4.95}\n",
+ "{'loss': 0.631, 'grad_norm': 1.9121012687683105, 'learning_rate': 9.761273209549072e-05, 'epoch': 5.12}\n",
+ "{'loss': 0.556, 'grad_norm': 1.620622992515564, 'learning_rate': 9.407603890362513e-05, 'epoch': 5.3}\n",
+ "{'loss': 0.5605, 'grad_norm': 1.618948221206665, 'learning_rate': 9.053934571175951e-05, 'epoch': 5.48}\n",
+ "{'loss': 0.5889, 'grad_norm': 1.811829686164856, 'learning_rate': 8.70026525198939e-05, 'epoch': 5.65}\n",
+ "{'loss': 0.5825, 'grad_norm': 2.1219239234924316, 'learning_rate': 8.34659593280283e-05, 'epoch': 5.83}\n",
+ "{'loss': 0.5787, 'grad_norm': 2.149881601333618, 'learning_rate': 7.99292661361627e-05, 'epoch': 6.01}\n",
+ "{'loss': 0.3971, 'grad_norm': 1.961249589920044, 'learning_rate': 7.639257294429708e-05, 'epoch': 6.18}\n",
+ "{'loss': 0.4116, 'grad_norm': 1.9004943370819092, 'learning_rate': 7.285587975243147e-05, 'epoch': 6.36}\n",
+ "{'loss': 0.429, 'grad_norm': 2.7660951614379883, 'learning_rate': 6.931918656056587e-05, 'epoch': 6.54}\n",
+ "{'loss': 0.4209, 'grad_norm': 1.6345818042755127, 'learning_rate': 6.578249336870027e-05, 'epoch': 6.71}\n",
+ "{'loss': 0.4275, 'grad_norm': 1.6811296939849854, 'learning_rate': 6.224580017683466e-05, 'epoch': 6.89}\n",
+ "{'loss': 0.3852, 'grad_norm': 1.5187770128250122, 'learning_rate': 5.870910698496905e-05, 'epoch': 7.07}\n",
+ "{'loss': 0.2924, 'grad_norm': 1.5174031257629395, 'learning_rate': 5.517241379310345e-05, 'epoch': 7.24}\n",
+ "{'loss': 0.3134, 'grad_norm': 1.481117606163025, 'learning_rate': 5.163572060123785e-05, 'epoch': 7.42}\n",
+ "{'loss': 0.3139, 'grad_norm': 1.8529402017593384, 'learning_rate': 4.809902740937224e-05, 'epoch': 7.6}\n",
+ "{'loss': 0.303, 'grad_norm': 1.9317971467971802, 'learning_rate': 4.4562334217506634e-05, 'epoch': 7.77}\n",
+ "{'loss': 0.3176, 'grad_norm': 1.7100918292999268, 'learning_rate': 4.1025641025641023e-05, 'epoch': 7.95}\n",
+ "{'loss': 0.2473, 'grad_norm': 1.5257225036621094, 'learning_rate': 3.7488947833775426e-05, 'epoch': 8.13}\n",
+ "{'loss': 0.2267, 'grad_norm': 1.563258171081543, 'learning_rate': 3.3952254641909815e-05, 'epoch': 8.3}\n",
+ "{'loss': 0.2282, 'grad_norm': 1.091956377029419, 'learning_rate': 3.041556145004421e-05, 'epoch': 8.48}\n",
+ "{'loss': 0.2358, 'grad_norm': 1.2666215896606445, 'learning_rate': 2.6878868258178604e-05, 'epoch': 8.66}\n",
+ "{'loss': 0.2354, 'grad_norm': 2.033458948135376, 'learning_rate': 2.3342175066313e-05, 'epoch': 8.83}\n",
+ "{'loss': 0.2375, 'grad_norm': 1.1279031038284302, 'learning_rate': 1.9805481874447392e-05, 'epoch': 9.01}\n",
+ "{'loss': 0.181, 'grad_norm': 1.1606773138046265, 'learning_rate': 1.6268788682581788e-05, 'epoch': 9.19}\n",
+ "{'loss': 0.1902, 'grad_norm': 1.120147466659546, 'learning_rate': 1.273209549071618e-05, 'epoch': 9.36}\n",
+ "{'loss': 0.1874, 'grad_norm': 1.1482255458831787, 'learning_rate': 9.195402298850575e-06, 'epoch': 9.54}\n",
+ "{'loss': 0.1878, 'grad_norm': 1.6668341159820557, 'learning_rate': 5.658709106984969e-06, 'epoch': 9.72}\n",
+ " 97%|███████████████████████████████████▉ | 5500/5660 [3:50:58<06:29, 2.43s/it]/home/inflaton/miniconda3/envs/unsloth_env/lib/python3.10/site-packages/peft/utils/other.py:611: UserWarning: Unable to fetch remote file due to the following error (MaxRetryError('HTTPSConnectionPool(host=\\'huggingface.co\\', port=443): Max retries exceeded with url: /unsloth/Qwen2-0.5B-Instruct-bnb-4bit/resolve/main/config.json (Caused by NameResolutionError(\": Failed to resolve \\'huggingface.co\\' ([Errno -3] Temporary failure in name resolution)\"))'), '(Request ID: c47e2483-6a5c-4a75-8fdb-c35160bdbb0d)') - silently ignoring the lookup for the file config.json in unsloth/Qwen2-0.5B-Instruct-bnb-4bit.\n",
+ " warnings.warn(\n",
+ "/home/inflaton/miniconda3/envs/unsloth_env/lib/python3.10/site-packages/peft/utils/save_and_load.py:195: UserWarning: Could not find a config file in unsloth/Qwen2-0.5B-Instruct-bnb-4bit - will assume that the vocabulary was not modified.\n",
+ " warnings.warn(\n",
+ "{'loss': 0.1887, 'grad_norm': 1.131452202796936, 'learning_rate': 2.1220159151193635e-06, 'epoch': 9.89}\n",
+ "{'train_runtime': 14270.4989, 'train_samples_per_second': 3.173, 'train_steps_per_second': 0.397, 'train_loss': 0.7989168851198661, 'epoch': 10.0}\n",
+ "100%|█████████████████████████████████████| 5660/5660 [3:57:50<00:00, 2.52s/it]\n",
+ "(5) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "14270.4989 seconds used for training.\n",
+ "237.84 minutes used for training.\n",
+ "Peak reserved memory = 3.043 GB.\n",
+ "Peak reserved memory for training = 0.0 GB.\n",
+ "Peak reserved memory % of max memory = 25.371 %.\n",
+ "Peak reserved memory for training % of max memory = 0.0 %.\n",
+ "Evaluating fine-tuned model: unsloth/Qwen2-0.5B-Instruct-bnb-4bit\n",
+ "100%|█████████████████████████████████████| 1133/1133 [1:52:28<00:00, 5.96s/it]\n",
+ " chinese ... unsloth/Qwen2-0.5B-Instruct-bnb-4bit(finetuned)\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Geng raised his rifle, squinting, and aimi...\n",
+ "\n",
+ "[1 rows x 8 columns]\n",
+ "(6) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "3.895 GB of memory reserved.\n",
+ "Unsloth: Merging 4bit and LoRA weights to 4bit...\n",
+ "This might take 5 minutes...\n",
+ "Done.\n",
+ "Unsloth: Saving tokenizer... Done.\n",
+ "Unsloth: Saving model... This might take 10 minutes for Llama-7b... Done.\n",
+ "Unsloth: Merging 4bit and LoRA weights to 4bit...\n",
+ "This might take 5 minutes...\n",
+ "Done.\n",
+ "Unsloth: Saving 4bit Bitsandbytes model. Please wait...\n",
+ "README.md: 100%|███████████████████████████████| 593/593 [00:00<00:00, 3.29MB/s]\n",
+ "model.safetensors: 100%|█████████████████████| 493M/493M [01:07<00:00, 7.32MB/s]\n",
+ "README.md: 100%|███████████████████████████████| 599/599 [00:00<00:00, 2.60MB/s]\n",
+ "Saved merged_4bit model to https://huggingface.co/Qwen2-0.5B-Instruct-bnb-4bit-MAC-merged_4bit_forced\n",
+ "Tuning unsloth/Qwen2-1.5B-Instruct-bnb-4bit\n",
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n",
+ "loading /home/inflaton/code/projects/courses/novel-translation/translation_engine_v3.py\n",
+ "loading env vars from: /home/inflaton/code/projects/courses/novel-translation/.env\n",
+ "unsloth/Qwen2-1.5B-Instruct-bnb-4bit True 2048 10 None datasets/mac/mac.tsv results/mac-results_v3.csv True True True\n",
+ "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.0 GB of memory reserved.\n",
+ "loading model: unsloth/Qwen2-1.5B-Instruct-bnb-4bit\n",
+ "==((====))== Unsloth: Fast Qwen2 patching release 2024.6\n",
+ " \\\\ /| GPU: NVIDIA GeForce RTX 4080 Laptop GPU. Max memory: 11.994 GB. Platform = Linux.\n",
+ "O^O/ \\_/ \\ Pytorch: 2.3.0. CUDA = 8.9. CUDA Toolkit = 12.1.\n",
+ "\\ / Bfloat16 = TRUE. Xformers = 0.0.26.post1. FA = False.\n",
+ " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n",
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "1.516 GB of memory reserved.\n",
+ "loading train/test data files\n",
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n",
+ "Evaluating base model: unsloth/Qwen2-1.5B-Instruct-bnb-4bit\n",
+ "100%|█████████████████████████████████████| 1133/1133 [1:32:31<00:00, 4.90s/it]\n",
+ " chinese ... unsloth/Qwen2-1.5B-Instruct-bnb-4bit\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Geer lifted his gun and squinted at it thr...\n",
+ "\n",
+ "[1 rows x 9 columns]\n",
+ "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "3.945 GB of memory reserved.\n",
+ "Unsloth 2024.6 patched 28 layers with 0 QKV layers, 28 O layers and 28 MLP layers.\n",
+ "(4) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "3.945 GB of memory reserved.\n",
+ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n",
+ " \\\\ /| Num examples = 4,528 | Num Epochs = 10\n",
+ "O^O/ \\_/ \\ Batch size per device = 2 | Gradient Accumulation steps = 4\n",
+ "\\ / Total batch size = 8 | Total steps = 5,660\n",
+ " \"-____-\" Number of trainable parameters = 18,464,768\n",
+ "{'loss': 1.7413, 'grad_norm': 0.6446139216423035, 'learning_rate': 0.00019664014146772768, 'epoch': 0.18}\n",
+ "{'loss': 1.5677, 'grad_norm': 0.6344625353813171, 'learning_rate': 0.0001931034482758621, 'epoch': 0.35}\n",
+ "{'loss': 1.5159, 'grad_norm': 0.6517081260681152, 'learning_rate': 0.00018956675508399648, 'epoch': 0.53}\n",
+ "{'loss': 1.5171, 'grad_norm': 0.5709907412528992, 'learning_rate': 0.00018603006189213086, 'epoch': 0.71}\n",
+ "{'loss': 1.4959, 'grad_norm': 0.568977952003479, 'learning_rate': 0.00018249336870026527, 'epoch': 0.88}\n",
+ "{'loss': 1.418, 'grad_norm': 0.6047976016998291, 'learning_rate': 0.00017895667550839965, 'epoch': 1.06}\n",
+ "{'loss': 1.2972, 'grad_norm': 0.7236104607582092, 'learning_rate': 0.00017541998231653406, 'epoch': 1.24}\n",
+ "{'loss': 1.3396, 'grad_norm': 0.6862805485725403, 'learning_rate': 0.00017188328912466844, 'epoch': 1.41}\n",
+ "{'loss': 1.2789, 'grad_norm': 0.8805968165397644, 'learning_rate': 0.00016834659593280285, 'epoch': 1.59}\n",
+ "{'loss': 1.2793, 'grad_norm': 0.8751853108406067, 'learning_rate': 0.00016480990274093723, 'epoch': 1.77}\n",
+ "{'loss': 1.3315, 'grad_norm': 0.8396161794662476, 'learning_rate': 0.00016127320954907164, 'epoch': 1.94}\n",
+ "{'loss': 1.1276, 'grad_norm': 0.9836744666099548, 'learning_rate': 0.000157736516357206, 'epoch': 2.12}\n",
+ "{'loss': 0.9962, 'grad_norm': 1.0947535037994385, 'learning_rate': 0.0001541998231653404, 'epoch': 2.3}\n",
+ "{'loss': 1.0297, 'grad_norm': 1.1813771724700928, 'learning_rate': 0.0001506631299734748, 'epoch': 2.47}\n",
+ "{'loss': 1.0349, 'grad_norm': 1.0732464790344238, 'learning_rate': 0.0001471264367816092, 'epoch': 2.65}\n",
+ "{'loss': 1.0351, 'grad_norm': 1.4764126539230347, 'learning_rate': 0.0001435897435897436, 'epoch': 2.83}\n",
+ "{'loss': 1.0081, 'grad_norm': 0.9122354388237, 'learning_rate': 0.000140053050397878, 'epoch': 3.0}\n",
+ "{'loss': 0.698, 'grad_norm': 1.328641414642334, 'learning_rate': 0.0001365163572060124, 'epoch': 3.18}\n",
+ "{'loss': 0.7099, 'grad_norm': 1.3143669366836548, 'learning_rate': 0.00013297966401414678, 'epoch': 3.36}\n",
+ "{'loss': 0.7166, 'grad_norm': 1.3019964694976807, 'learning_rate': 0.0001294429708222812, 'epoch': 3.53}\n",
+ "{'loss': 0.7053, 'grad_norm': 1.4416537284851074, 'learning_rate': 0.00012590627763041555, 'epoch': 3.71}\n",
+ "{'loss': 0.7448, 'grad_norm': 1.191597819328308, 'learning_rate': 0.00012236958443854996, 'epoch': 3.89}\n",
+ "{'loss': 0.6435, 'grad_norm': 1.6522774696350098, 'learning_rate': 0.00011883289124668435, 'epoch': 4.06}\n",
+ "{'loss': 0.4617, 'grad_norm': 1.5974637269973755, 'learning_rate': 0.00011529619805481875, 'epoch': 4.24}\n",
+ "{'loss': 0.4463, 'grad_norm': 2.0055346488952637, 'learning_rate': 0.00011175950486295315, 'epoch': 4.42}\n",
+ "{'loss': 0.4806, 'grad_norm': 1.7279341220855713, 'learning_rate': 0.00010822281167108754, 'epoch': 4.59}\n",
+ "{'loss': 0.4799, 'grad_norm': 1.6954944133758545, 'learning_rate': 0.00010468611847922194, 'epoch': 4.77}\n",
+ "{'loss': 0.4874, 'grad_norm': 1.6931037902832031, 'learning_rate': 0.00010114942528735633, 'epoch': 4.95}\n",
+ "{'loss': 0.3582, 'grad_norm': 1.3343307971954346, 'learning_rate': 9.761273209549072e-05, 'epoch': 5.12}\n",
+ "{'loss': 0.2918, 'grad_norm': 1.6368076801300049, 'learning_rate': 9.407603890362513e-05, 'epoch': 5.3}\n",
+ "{'loss': 0.2989, 'grad_norm': 1.4428879022598267, 'learning_rate': 9.053934571175951e-05, 'epoch': 5.48}\n",
+ "{'loss': 0.3153, 'grad_norm': 1.721048355102539, 'learning_rate': 8.70026525198939e-05, 'epoch': 5.65}\n",
+ "{'loss': 0.307, 'grad_norm': 1.464351773262024, 'learning_rate': 8.34659593280283e-05, 'epoch': 5.83}\n",
+ "{'loss': 0.3065, 'grad_norm': 1.1433079242706299, 'learning_rate': 7.99292661361627e-05, 'epoch': 6.01}\n",
+ "{'loss': 0.1922, 'grad_norm': 1.671250343322754, 'learning_rate': 7.639257294429708e-05, 'epoch': 6.18}\n",
+ "{'loss': 0.2034, 'grad_norm': 1.0947823524475098, 'learning_rate': 7.285587975243147e-05, 'epoch': 6.36}\n",
+ "{'loss': 0.2064, 'grad_norm': 1.121410608291626, 'learning_rate': 6.931918656056587e-05, 'epoch': 6.54}\n",
+ "{'loss': 0.208, 'grad_norm': 1.0732439756393433, 'learning_rate': 6.578249336870027e-05, 'epoch': 6.71}\n",
+ "{'loss': 0.2129, 'grad_norm': 1.4153015613555908, 'learning_rate': 6.224580017683466e-05, 'epoch': 6.89}\n",
+ "{'loss': 0.1935, 'grad_norm': 0.7308939099311829, 'learning_rate': 5.870910698496905e-05, 'epoch': 7.07}\n",
+ "{'loss': 0.1485, 'grad_norm': 0.49827346205711365, 'learning_rate': 5.517241379310345e-05, 'epoch': 7.24}\n",
+ "{'loss': 0.157, 'grad_norm': 0.9691917300224304, 'learning_rate': 5.163572060123785e-05, 'epoch': 7.42}\n",
+ "{'loss': 0.1574, 'grad_norm': 0.8331239819526672, 'learning_rate': 4.809902740937224e-05, 'epoch': 7.6}\n",
+ "{'loss': 0.1584, 'grad_norm': 0.6648879051208496, 'learning_rate': 4.4562334217506634e-05, 'epoch': 7.77}\n",
+ "{'loss': 0.1577, 'grad_norm': 1.7691469192504883, 'learning_rate': 4.1025641025641023e-05, 'epoch': 7.95}\n",
+ "{'loss': 0.1366, 'grad_norm': 0.4894603192806244, 'learning_rate': 3.7488947833775426e-05, 'epoch': 8.13}\n",
+ "{'loss': 0.1303, 'grad_norm': 0.7218558192253113, 'learning_rate': 3.3952254641909815e-05, 'epoch': 8.3}\n",
+ "{'loss': 0.1311, 'grad_norm': 0.512703537940979, 'learning_rate': 3.041556145004421e-05, 'epoch': 8.48}\n",
+ "{'loss': 0.1333, 'grad_norm': 0.49423089623451233, 'learning_rate': 2.6878868258178604e-05, 'epoch': 8.66}\n",
+ "{'loss': 0.1338, 'grad_norm': 0.8092319965362549, 'learning_rate': 2.3342175066313e-05, 'epoch': 8.83}\n",
+ " 88%|████████████████████████████████▋ | 5000/5660 [3:52:46<30:36, 2.78s/it]/home/inflaton/miniconda3/envs/unsloth_env/lib/python3.10/site-packages/peft/utils/other.py:611: UserWarning: Unable to fetch remote file due to the following error (MaxRetryError('HTTPSConnectionPool(host=\\'huggingface.co\\', port=443): Max retries exceeded with url: /unsloth/Qwen2-1.5B-Instruct-bnb-4bit/resolve/main/config.json (Caused by NameResolutionError(\": Failed to resolve \\'huggingface.co\\' ([Errno -3] Temporary failure in name resolution)\"))'), '(Request ID: 8549560f-a295-478f-bfcb-44574f659439)') - silently ignoring the lookup for the file config.json in unsloth/Qwen2-1.5B-Instruct-bnb-4bit.\n",
+ " warnings.warn(\n",
+ "/home/inflaton/miniconda3/envs/unsloth_env/lib/python3.10/site-packages/peft/utils/save_and_load.py:195: UserWarning: Could not find a config file in unsloth/Qwen2-1.5B-Instruct-bnb-4bit - will assume that the vocabulary was not modified.\n",
+ " warnings.warn(\n",
+ "{'loss': 0.1326, 'grad_norm': 0.4783310890197754, 'learning_rate': 1.9805481874447392e-05, 'epoch': 9.01}\n",
+ "{'loss': 0.1177, 'grad_norm': 0.44080662727355957, 'learning_rate': 1.6268788682581788e-05, 'epoch': 9.19}\n",
+ "{'loss': 0.1184, 'grad_norm': 0.5362476706504822, 'learning_rate': 1.273209549071618e-05, 'epoch': 9.36}\n",
+ "{'loss': 0.1191, 'grad_norm': 0.47040414810180664, 'learning_rate': 9.195402298850575e-06, 'epoch': 9.54}\n",
+ "{'loss': 0.12, 'grad_norm': 0.5143936276435852, 'learning_rate': 5.658709106984969e-06, 'epoch': 9.72}\n",
+ "{'loss': 0.1228, 'grad_norm': 0.433252215385437, 'learning_rate': 2.1220159151193635e-06, 'epoch': 9.89}\n",
+ "{'train_runtime': 15909.7187, 'train_samples_per_second': 2.846, 'train_steps_per_second': 0.356, 'train_loss': 0.5994435462850564, 'epoch': 10.0}\n",
+ "100%|█████████████████████████████████████| 5660/5660 [4:25:09<00:00, 2.81s/it]\n",
+ "(5) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "15909.7187 seconds used for training.\n",
+ "265.16 minutes used for training.\n",
+ "Peak reserved memory = 3.945 GB.\n",
+ "Peak reserved memory for training = 0.0 GB.\n",
+ "Peak reserved memory % of max memory = 32.891 %.\n",
+ "Peak reserved memory for training % of max memory = 0.0 %.\n",
+ "Evaluating fine-tuned model: unsloth/Qwen2-1.5B-Instruct-bnb-4bit\n",
+ "100%|█████████████████████████████████████| 1133/1133 [2:17:51<00:00, 7.30s/it]\n",
+ " chinese ... unsloth/Qwen2-1.5B-Instruct-bnb-4bit(finetuned)\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Geng raised the rifle to his eye, squinted...\n",
+ "\n",
+ "[1 rows x 10 columns]\n",
+ "(6) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "5.33 GB of memory reserved.\n",
+ "Unsloth: Merging 4bit and LoRA weights to 4bit...\n",
+ "This might take 5 minutes...\n",
+ "Done.\n",
+ "Unsloth: Saving tokenizer... Done.\n",
+ "Unsloth: Saving model... This might take 10 minutes for Llama-7b... Done.\n",
+ "Unsloth: Merging 4bit and LoRA weights to 4bit...\n",
+ "This might take 5 minutes...\n",
+ "Done.\n",
+ "Unsloth: Saving 4bit Bitsandbytes model. Please wait...\n",
+ "README.md: 100%|███████████████████████████████| 593/593 [00:00<00:00, 3.33MB/s]\n",
+ "model.safetensors: 100%|███████████████████| 1.22G/1.22G [01:41<00:00, 12.0MB/s]\n",
+ "README.md: 100%|███████████████████████████████| 599/599 [00:00<00:00, 3.39MB/s]\n",
+ "Saved merged_4bit model to https://huggingface.co/Qwen2-1.5B-Instruct-bnb-4bit-MAC-merged_4bit_forced\n"
+ ]
+ }
+ ],
+ "source": [
+ "!./tune-small.sh"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Current Directory:\n",
+ "/home/inflaton/code/projects/courses/novel-translation\n",
+ "Tuning unsloth/Qwen2-0.5B-Instruct\n",
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n",
+ "loading /home/inflaton/code/projects/courses/novel-translation/translation_engine_v3.py\n",
+ "loading env vars from: /home/inflaton/code/projects/courses/novel-translation/.env\n",
+ "unsloth/Qwen2-0.5B-Instruct True 2048 10 None datasets/mac/mac.tsv results/mac-results_v3.csv True True True\n",
+ "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.0 GB of memory reserved.\n",
+ "loading model: unsloth/Qwen2-0.5B-Instruct\n",
+ "==((====))== Unsloth: Fast Qwen2 patching release 2024.6\n",
+ " \\\\ /| GPU: NVIDIA GeForce RTX 4080 Laptop GPU. Max memory: 11.994 GB. Platform = Linux.\n",
+ "O^O/ \\_/ \\ Pytorch: 2.3.0. CUDA = 8.9. CUDA Toolkit = 12.1.\n",
+ "\\ / Bfloat16 = TRUE. Xformers = 0.0.26.post1. FA = False.\n",
+ " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n",
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.633 GB of memory reserved.\n",
+ "loading train/test data files\n",
+ "Map: 100%|█████████████████████████| 4528/4528 [00:00<00:00, 8820.36 examples/s]\n",
+ "Map: 100%|█████████████████████████| 1133/1133 [00:00<00:00, 4632.29 examples/s]\n",
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n",
+ "Evaluating base model: unsloth/Qwen2-0.5B-Instruct\n",
+ "100%|█████████████████████████████████████| 1133/1133 [1:18:16<00:00, 4.15s/it]\n",
+ " chinese ... unsloth/Qwen2-0.5B-Instruct\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Teng raises his gun, closing his eyes with...\n",
+ "\n",
+ "[1 rows x 11 columns]\n",
+ "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.912 GB of memory reserved.\n",
+ "Unsloth 2024.6 patched 24 layers with 0 QKV layers, 24 O layers and 24 MLP layers.\n",
+ "Map (num_proc=2): 100%|████████████| 4528/4528 [00:02<00:00, 2104.12 examples/s]\n",
+ "(4) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.912 GB of memory reserved.\n",
+ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n",
+ " \\\\ /| Num examples = 4,528 | Num Epochs = 10\n",
+ "O^O/ \\_/ \\ Batch size per device = 2 | Gradient Accumulation steps = 4\n",
+ "\\ / Total batch size = 8 | Total steps = 5,660\n",
+ " \"-____-\" Number of trainable parameters = 8,798,208\n",
+ "{'loss': 1.9401, 'grad_norm': 0.9639493823051453, 'learning_rate': 0.00019664014146772768, 'epoch': 0.18}\n",
+ "{'loss': 1.7762, 'grad_norm': 0.8044034838676453, 'learning_rate': 0.0001931034482758621, 'epoch': 0.35}\n",
+ "{'loss': 1.7147, 'grad_norm': 0.9299402832984924, 'learning_rate': 0.00018956675508399648, 'epoch': 0.53}\n",
+ "{'loss': 1.7153, 'grad_norm': 0.7542973160743713, 'learning_rate': 0.00018603006189213086, 'epoch': 0.71}\n",
+ "{'loss': 1.6864, 'grad_norm': 0.8260002136230469, 'learning_rate': 0.00018249336870026527, 'epoch': 0.88}\n",
+ "{'loss': 1.6077, 'grad_norm': 0.8066288828849792, 'learning_rate': 0.00017895667550839965, 'epoch': 1.06}\n",
+ "{'loss': 1.4918, 'grad_norm': 0.9108980894088745, 'learning_rate': 0.00017541998231653406, 'epoch': 1.24}\n",
+ "{'loss': 1.5405, 'grad_norm': 0.9814554452896118, 'learning_rate': 0.00017188328912466844, 'epoch': 1.41}\n",
+ "{'loss': 1.4721, 'grad_norm': 1.0819430351257324, 'learning_rate': 0.00016834659593280285, 'epoch': 1.59}\n",
+ "{'loss': 1.4689, 'grad_norm': 0.9869484901428223, 'learning_rate': 0.00016480990274093723, 'epoch': 1.77}\n",
+ "{'loss': 1.5278, 'grad_norm': 1.0101701021194458, 'learning_rate': 0.00016127320954907164, 'epoch': 1.94}\n",
+ "{'loss': 1.3491, 'grad_norm': 1.1646143198013306, 'learning_rate': 0.000157736516357206, 'epoch': 2.12}\n",
+ "{'loss': 1.2154, 'grad_norm': 1.185487985610962, 'learning_rate': 0.0001541998231653404, 'epoch': 2.3}\n",
+ "{'loss': 1.2576, 'grad_norm': 1.3092553615570068, 'learning_rate': 0.0001506631299734748, 'epoch': 2.47}\n",
+ "{'loss': 1.2554, 'grad_norm': 1.3459289073944092, 'learning_rate': 0.0001471264367816092, 'epoch': 2.65}\n",
+ "{'loss': 1.2662, 'grad_norm': 1.1843770742416382, 'learning_rate': 0.0001435897435897436, 'epoch': 2.83}\n",
+ "{'loss': 1.2348, 'grad_norm': 1.1434661149978638, 'learning_rate': 0.000140053050397878, 'epoch': 3.0}\n",
+ "{'loss': 0.9799, 'grad_norm': 1.5520676374435425, 'learning_rate': 0.0001365163572060124, 'epoch': 3.18}\n",
+ "{'loss': 0.9922, 'grad_norm': 1.5200737714767456, 'learning_rate': 0.00013297966401414678, 'epoch': 3.36}\n",
+ "{'loss': 0.9983, 'grad_norm': 1.5352424383163452, 'learning_rate': 0.0001294429708222812, 'epoch': 3.53}\n",
+ "{'loss': 0.9806, 'grad_norm': 1.5652823448181152, 'learning_rate': 0.00012590627763041555, 'epoch': 3.71}\n",
+ "{'loss': 1.0233, 'grad_norm': 1.6117287874221802, 'learning_rate': 0.00012236958443854996, 'epoch': 3.89}\n",
+ "{'loss': 0.9315, 'grad_norm': 1.5618888139724731, 'learning_rate': 0.00011883289124668435, 'epoch': 4.06}\n",
+ "{'loss': 0.7403, 'grad_norm': 1.6472711563110352, 'learning_rate': 0.00011529619805481875, 'epoch': 4.24}\n",
+ "{'loss': 0.7402, 'grad_norm': 1.7707853317260742, 'learning_rate': 0.00011175950486295315, 'epoch': 4.42}\n",
+ "{'loss': 0.7748, 'grad_norm': 2.0032260417938232, 'learning_rate': 0.00010822281167108754, 'epoch': 4.59}\n",
+ "{'loss': 0.773, 'grad_norm': 2.1941237449645996, 'learning_rate': 0.00010468611847922194, 'epoch': 4.77}\n",
+ "{'loss': 0.7942, 'grad_norm': 1.99174165725708, 'learning_rate': 0.00010114942528735633, 'epoch': 4.95}\n",
+ "{'loss': 0.6302, 'grad_norm': 2.141026496887207, 'learning_rate': 9.761273209549072e-05, 'epoch': 5.12}\n",
+ "{'loss': 0.555, 'grad_norm': 1.5508298873901367, 'learning_rate': 9.407603890362513e-05, 'epoch': 5.3}\n",
+ "{'loss': 0.5604, 'grad_norm': 1.7151756286621094, 'learning_rate': 9.053934571175951e-05, 'epoch': 5.48}\n",
+ "{'loss': 0.5907, 'grad_norm': 1.838254690170288, 'learning_rate': 8.70026525198939e-05, 'epoch': 5.65}\n",
+ "{'loss': 0.5818, 'grad_norm': 2.1077213287353516, 'learning_rate': 8.34659593280283e-05, 'epoch': 5.83}\n",
+ "{'loss': 0.5784, 'grad_norm': 1.6738381385803223, 'learning_rate': 7.99292661361627e-05, 'epoch': 6.01}\n",
+ "{'loss': 0.3977, 'grad_norm': 3.3859152793884277, 'learning_rate': 7.639257294429708e-05, 'epoch': 6.18}\n",
+ "{'loss': 0.4097, 'grad_norm': 1.9067131280899048, 'learning_rate': 7.285587975243147e-05, 'epoch': 6.36}\n",
+ "{'loss': 0.4299, 'grad_norm': 2.464520215988159, 'learning_rate': 6.931918656056587e-05, 'epoch': 6.54}\n",
+ "{'loss': 0.4206, 'grad_norm': 1.9985911846160889, 'learning_rate': 6.578249336870027e-05, 'epoch': 6.71}\n",
+ "{'loss': 0.4272, 'grad_norm': 1.5266529321670532, 'learning_rate': 6.224580017683466e-05, 'epoch': 6.89}\n",
+ "{'loss': 0.3845, 'grad_norm': 1.98494553565979, 'learning_rate': 5.870910698496905e-05, 'epoch': 7.07}\n",
+ "{'loss': 0.2941, 'grad_norm': 1.8065193891525269, 'learning_rate': 5.517241379310345e-05, 'epoch': 7.24}\n",
+ "{'loss': 0.3126, 'grad_norm': 1.2746621370315552, 'learning_rate': 5.163572060123785e-05, 'epoch': 7.42}\n",
+ "{'loss': 0.3137, 'grad_norm': 2.0079100131988525, 'learning_rate': 4.809902740937224e-05, 'epoch': 7.6}\n",
+ "{'loss': 0.3043, 'grad_norm': 1.8258264064788818, 'learning_rate': 4.4562334217506634e-05, 'epoch': 7.77}\n",
+ "{'loss': 0.3171, 'grad_norm': 1.5290628671646118, 'learning_rate': 4.1025641025641023e-05, 'epoch': 7.95}\n",
+ "{'loss': 0.2472, 'grad_norm': 1.458840250968933, 'learning_rate': 3.7488947833775426e-05, 'epoch': 8.13}\n",
+ "{'loss': 0.2266, 'grad_norm': 1.4306132793426514, 'learning_rate': 3.3952254641909815e-05, 'epoch': 8.3}\n",
+ "{'loss': 0.2287, 'grad_norm': 1.590943455696106, 'learning_rate': 3.041556145004421e-05, 'epoch': 8.48}\n",
+ "{'loss': 0.2362, 'grad_norm': 1.136732816696167, 'learning_rate': 2.6878868258178604e-05, 'epoch': 8.66}\n",
+ "{'loss': 0.2357, 'grad_norm': 2.2578623294830322, 'learning_rate': 2.3342175066313e-05, 'epoch': 8.83}\n",
+ "{'loss': 0.2376, 'grad_norm': 1.0956445932388306, 'learning_rate': 1.9805481874447392e-05, 'epoch': 9.01}\n",
+ "{'loss': 0.1812, 'grad_norm': 1.0962462425231934, 'learning_rate': 1.6268788682581788e-05, 'epoch': 9.19}\n",
+ "{'loss': 0.1901, 'grad_norm': 1.0591093301773071, 'learning_rate': 1.273209549071618e-05, 'epoch': 9.36}\n",
+ "{'loss': 0.1872, 'grad_norm': 1.2788742780685425, 'learning_rate': 9.195402298850575e-06, 'epoch': 9.54}\n",
+ "{'loss': 0.1885, 'grad_norm': 1.6012808084487915, 'learning_rate': 5.658709106984969e-06, 'epoch': 9.72}\n",
+ "{'loss': 0.1893, 'grad_norm': 1.0120514631271362, 'learning_rate': 2.1220159151193635e-06, 'epoch': 9.89}\n",
+ "{'train_runtime': 14483.8236, 'train_samples_per_second': 3.126, 'train_steps_per_second': 0.391, 'train_loss': 0.7988819386849555, 'epoch': 10.0}\n",
+ "100%|█████████████████████████████████████| 5660/5660 [4:01:23<00:00, 2.56s/it]\n",
+ "(5) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "14483.8236 seconds used for training.\n",
+ "241.4 minutes used for training.\n",
+ "Peak reserved memory = 1.371 GB.\n",
+ "Peak reserved memory for training = 0.459 GB.\n",
+ "Peak reserved memory % of max memory = 11.431 %.\n",
+ "Peak reserved memory for training % of max memory = 3.827 %.\n",
+ "Evaluating fine-tuned model: unsloth/Qwen2-0.5B-Instruct\n",
+ "100%|█████████████████████████████████████| 1133/1133 [1:40:58<00:00, 5.35s/it]\n",
+ " chinese ... unsloth/Qwen2-0.5B-Instruct(finetuned)\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Geng raised his rifle and made a twist eye...\n",
+ "\n",
+ "[1 rows x 12 columns]\n",
+ "(6) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "1.371 GB of memory reserved.\n",
+ "Unsloth: Merging 4bit and LoRA weights to 4bit...\n",
+ "This might take 5 minutes...\n",
+ "Done.\n",
+ "Unsloth: Saving tokenizer... Done.\n",
+ "Unsloth: Saving model... This might take 10 minutes for Llama-7b... Done.\n",
+ "Unsloth: Merging 4bit and LoRA weights to 4bit...\n",
+ "This might take 5 minutes...\n",
+ "Done.\n",
+ "Unsloth: Saving 4bit Bitsandbytes model. Please wait...\n",
+ "model.safetensors: 100%|█████████████████████| 493M/493M [00:38<00:00, 12.7MB/s]\n",
+ "README.md: 100%|███████████████████████████████| 581/581 [00:00<00:00, 3.37MB/s]\n",
+ "Saved merged_4bit model to https://huggingface.co/Qwen2-0.5B-Instruct-MAC-merged_4bit_forced\n",
+ "Tuning unsloth/Qwen2-1.5B-Instruct\n",
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n",
+ "loading /home/inflaton/code/projects/courses/novel-translation/translation_engine_v3.py\n",
+ "loading env vars from: /home/inflaton/code/projects/courses/novel-translation/.env\n",
+ "unsloth/Qwen2-1.5B-Instruct True 2048 10 None datasets/mac/mac.tsv results/mac-results_v3.csv True True True\n",
+ "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.0 GB of memory reserved.\n",
+ "loading model: unsloth/Qwen2-1.5B-Instruct\n",
+ "==((====))== Unsloth: Fast Qwen2 patching release 2024.6\n",
+ " \\\\ /| GPU: NVIDIA GeForce RTX 4080 Laptop GPU. Max memory: 11.994 GB. Platform = Linux.\n",
+ "O^O/ \\_/ \\ Pytorch: 2.3.0. CUDA = 8.9. CUDA Toolkit = 12.1.\n",
+ "\\ / Bfloat16 = TRUE. Xformers = 0.0.26.post1. FA = False.\n",
+ " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n",
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "1.516 GB of memory reserved.\n",
+ "loading train/test data files\n",
+ "Map: 100%|████████████████████████| 4528/4528 [00:00<00:00, 18414.78 examples/s]\n",
+ "Map: 100%|█████████████████████████| 1133/1133 [00:00<00:00, 9253.25 examples/s]\n",
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n",
+ "Evaluating base model: unsloth/Qwen2-1.5B-Instruct\n",
+ "100%|█████████████████████████████████████| 1133/1133 [1:13:14<00:00, 3.88s/it]\n",
+ " chinese ... unsloth/Qwen2-1.5B-Instruct\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝...\n",
+ "\n",
+ "[1 rows x 13 columns]\n",
+ "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "1.777 GB of memory reserved.\n",
+ "Unsloth 2024.6 patched 28 layers with 0 QKV layers, 28 O layers and 28 MLP layers.\n",
+ "Map (num_proc=2): 100%|████████████| 4528/4528 [00:02<00:00, 2257.38 examples/s]\n",
+ "(4) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "1.777 GB of memory reserved.\n",
+ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n",
+ " \\\\ /| Num examples = 4,528 | Num Epochs = 10\n",
+ "O^O/ \\_/ \\ Batch size per device = 2 | Gradient Accumulation steps = 4\n",
+ "\\ / Total batch size = 8 | Total steps = 5,660\n",
+ " \"-____-\" Number of trainable parameters = 18,464,768\n",
+ "{'loss': 1.7417, 'grad_norm': 0.6452760696411133, 'learning_rate': 0.00019664014146772768, 'epoch': 0.18}\n",
+ "{'loss': 1.5681, 'grad_norm': 0.6196172833442688, 'learning_rate': 0.0001931034482758621, 'epoch': 0.35}\n",
+ "{'loss': 1.516, 'grad_norm': 0.6484832763671875, 'learning_rate': 0.00018956675508399648, 'epoch': 0.53}\n",
+ "{'loss': 1.5172, 'grad_norm': 0.5743487477302551, 'learning_rate': 0.00018603006189213086, 'epoch': 0.71}\n",
+ "{'loss': 1.4959, 'grad_norm': 0.5681684017181396, 'learning_rate': 0.00018249336870026527, 'epoch': 0.88}\n",
+ "{'loss': 1.418, 'grad_norm': 0.6087610125541687, 'learning_rate': 0.00017895667550839965, 'epoch': 1.06}\n",
+ "{'loss': 1.2977, 'grad_norm': 0.7200368642807007, 'learning_rate': 0.00017541998231653406, 'epoch': 1.24}\n",
+ "{'loss': 1.3391, 'grad_norm': 0.6828812956809998, 'learning_rate': 0.00017188328912466844, 'epoch': 1.41}\n",
+ "{'loss': 1.2787, 'grad_norm': 0.8858041763305664, 'learning_rate': 0.00016834659593280285, 'epoch': 1.59}\n",
+ "{'loss': 1.2797, 'grad_norm': 0.8660985231399536, 'learning_rate': 0.00016480990274093723, 'epoch': 1.77}\n",
+ "{'loss': 1.3315, 'grad_norm': 1.1288585662841797, 'learning_rate': 0.00016127320954907164, 'epoch': 1.94}\n",
+ "{'loss': 1.1279, 'grad_norm': 1.004281759262085, 'learning_rate': 0.000157736516357206, 'epoch': 2.12}\n",
+ "{'loss': 0.9951, 'grad_norm': 1.0934711694717407, 'learning_rate': 0.0001541998231653404, 'epoch': 2.3}\n",
+ "{'loss': 1.0299, 'grad_norm': 1.2658041715621948, 'learning_rate': 0.0001506631299734748, 'epoch': 2.47}\n",
+ "{'loss': 1.0337, 'grad_norm': 1.0768382549285889, 'learning_rate': 0.0001471264367816092, 'epoch': 2.65}\n",
+ "{'loss': 1.0343, 'grad_norm': 1.031988501548767, 'learning_rate': 0.0001435897435897436, 'epoch': 2.83}\n",
+ "{'loss': 1.0086, 'grad_norm': 0.9097064733505249, 'learning_rate': 0.000140053050397878, 'epoch': 3.0}\n",
+ "{'loss': 0.6964, 'grad_norm': 1.314468502998352, 'learning_rate': 0.0001365163572060124, 'epoch': 3.18}\n",
+ "{'loss': 0.7091, 'grad_norm': 1.35807204246521, 'learning_rate': 0.00013297966401414678, 'epoch': 3.36}\n",
+ "{'loss': 0.7151, 'grad_norm': 1.3476978540420532, 'learning_rate': 0.0001294429708222812, 'epoch': 3.53}\n",
+ " 35%|████████████▎ | 2000/5660 [1:37:50<2:55:23, 2.88s/it]/home/inflaton/miniconda3/envs/unsloth_env/lib/python3.10/site-packages/peft/utils/other.py:611: UserWarning: Unable to fetch remote file due to the following error (MaxRetryError('HTTPSConnectionPool(host=\\'huggingface.co\\', port=443): Max retries exceeded with url: /unsloth/qwen2-1.5b-instruct-bnb-4bit/resolve/main/config.json (Caused by NameResolutionError(\": Failed to resolve \\'huggingface.co\\' ([Errno -3] Temporary failure in name resolution)\"))'), '(Request ID: 628a4ff6-a882-4c3c-8607-702c8596b3a3)') - silently ignoring the lookup for the file config.json in unsloth/qwen2-1.5b-instruct-bnb-4bit.\n",
+ " warnings.warn(\n",
+ "/home/inflaton/miniconda3/envs/unsloth_env/lib/python3.10/site-packages/peft/utils/save_and_load.py:195: UserWarning: Could not find a config file in unsloth/qwen2-1.5b-instruct-bnb-4bit - will assume that the vocabulary was not modified.\n",
+ " warnings.warn(\n",
+ "{'loss': 0.7047, 'grad_norm': 1.4935038089752197, 'learning_rate': 0.00012590627763041555, 'epoch': 3.71}\n",
+ "{'loss': 0.7433, 'grad_norm': 1.3313759565353394, 'learning_rate': 0.00012236958443854996, 'epoch': 3.89}\n",
+ "{'loss': 0.6411, 'grad_norm': 1.1943646669387817, 'learning_rate': 0.00011883289124668435, 'epoch': 4.06}\n",
+ "{'loss': 0.4593, 'grad_norm': 1.4492411613464355, 'learning_rate': 0.00011529619805481875, 'epoch': 4.24}\n",
+ "{'loss': 0.4449, 'grad_norm': 1.6588672399520874, 'learning_rate': 0.00011175950486295315, 'epoch': 4.42}\n",
+ " 44%|███████████████▍ | 2500/5660 [2:02:16<2:35:11, 2.95s/it]/home/inflaton/miniconda3/envs/unsloth_env/lib/python3.10/site-packages/peft/utils/other.py:611: UserWarning: Unable to fetch remote file due to the following error (MaxRetryError('HTTPSConnectionPool(host=\\'huggingface.co\\', port=443): Max retries exceeded with url: /unsloth/qwen2-1.5b-instruct-bnb-4bit/resolve/main/config.json (Caused by NameResolutionError(\": Failed to resolve \\'huggingface.co\\' ([Errno -3] Temporary failure in name resolution)\"))'), '(Request ID: 5665c6e3-3fe8-43f6-b849-73a605e379e9)') - silently ignoring the lookup for the file config.json in unsloth/qwen2-1.5b-instruct-bnb-4bit.\n",
+ " warnings.warn(\n",
+ "/home/inflaton/miniconda3/envs/unsloth_env/lib/python3.10/site-packages/peft/utils/save_and_load.py:195: UserWarning: Could not find a config file in unsloth/qwen2-1.5b-instruct-bnb-4bit - will assume that the vocabulary was not modified.\n",
+ " warnings.warn(\n",
+ "{'loss': 0.4778, 'grad_norm': 1.644708514213562, 'learning_rate': 0.00010822281167108754, 'epoch': 4.59}\n",
+ "{'loss': 0.4785, 'grad_norm': 1.7970712184906006, 'learning_rate': 0.00010468611847922194, 'epoch': 4.77}\n",
+ "{'loss': 0.487, 'grad_norm': 1.5495588779449463, 'learning_rate': 0.00010114942528735633, 'epoch': 4.95}\n",
+ "{'loss': 0.3612, 'grad_norm': 1.3270775079727173, 'learning_rate': 9.761273209549072e-05, 'epoch': 5.12}\n",
+ "{'loss': 0.2908, 'grad_norm': 1.0414644479751587, 'learning_rate': 9.407603890362513e-05, 'epoch': 5.3}\n",
+ " 53%|██████████████████▌ | 3000/5660 [2:27:09<2:06:06, 2.84s/it]/home/inflaton/miniconda3/envs/unsloth_env/lib/python3.10/site-packages/peft/utils/other.py:611: UserWarning: Unable to fetch remote file due to the following error (MaxRetryError('HTTPSConnectionPool(host=\\'huggingface.co\\', port=443): Max retries exceeded with url: /unsloth/qwen2-1.5b-instruct-bnb-4bit/resolve/main/config.json (Caused by NameResolutionError(\": Failed to resolve \\'huggingface.co\\' ([Errno -3] Temporary failure in name resolution)\"))'), '(Request ID: 46085388-05e0-410c-b629-34f2cbb41b46)') - silently ignoring the lookup for the file config.json in unsloth/qwen2-1.5b-instruct-bnb-4bit.\n",
+ " warnings.warn(\n",
+ "/home/inflaton/miniconda3/envs/unsloth_env/lib/python3.10/site-packages/peft/utils/save_and_load.py:195: UserWarning: Could not find a config file in unsloth/qwen2-1.5b-instruct-bnb-4bit - will assume that the vocabulary was not modified.\n",
+ " warnings.warn(\n",
+ "{'loss': 0.2978, 'grad_norm': 1.3449839353561401, 'learning_rate': 9.053934571175951e-05, 'epoch': 5.48}\n",
+ "{'loss': 0.3143, 'grad_norm': 1.344139814376831, 'learning_rate': 8.70026525198939e-05, 'epoch': 5.65}\n",
+ "{'loss': 0.3089, 'grad_norm': 1.6479969024658203, 'learning_rate': 8.34659593280283e-05, 'epoch': 5.83}\n",
+ "{'loss': 0.3053, 'grad_norm': 0.8114176392555237, 'learning_rate': 7.99292661361627e-05, 'epoch': 6.01}\n",
+ "{'loss': 0.1932, 'grad_norm': 1.7617920637130737, 'learning_rate': 7.639257294429708e-05, 'epoch': 6.18}\n",
+ "{'loss': 0.2036, 'grad_norm': 1.1698989868164062, 'learning_rate': 7.285587975243147e-05, 'epoch': 6.36}\n",
+ "{'loss': 0.205, 'grad_norm': 1.1838085651397705, 'learning_rate': 6.931918656056587e-05, 'epoch': 6.54}\n",
+ "{'loss': 0.2078, 'grad_norm': 1.3557209968566895, 'learning_rate': 6.578249336870027e-05, 'epoch': 6.71}\n",
+ "{'loss': 0.2152, 'grad_norm': 1.0370357036590576, 'learning_rate': 6.224580017683466e-05, 'epoch': 6.89}\n",
+ "{'loss': 0.1935, 'grad_norm': 0.6839048862457275, 'learning_rate': 5.870910698496905e-05, 'epoch': 7.07}\n",
+ "{'loss': 0.1503, 'grad_norm': 0.8074870705604553, 'learning_rate': 5.517241379310345e-05, 'epoch': 7.24}\n",
+ "{'loss': 0.1571, 'grad_norm': 0.7514998912811279, 'learning_rate': 5.163572060123785e-05, 'epoch': 7.42}\n",
+ "{'loss': 0.1571, 'grad_norm': 0.7462531328201294, 'learning_rate': 4.809902740937224e-05, 'epoch': 7.6}\n",
+ "{'loss': 0.157, 'grad_norm': 0.773760199546814, 'learning_rate': 4.4562334217506634e-05, 'epoch': 7.77}\n",
+ "{'loss': 0.1584, 'grad_norm': 1.2061128616333008, 'learning_rate': 4.1025641025641023e-05, 'epoch': 7.95}\n",
+ "{'loss': 0.1365, 'grad_norm': 0.5050584077835083, 'learning_rate': 3.7488947833775426e-05, 'epoch': 8.13}\n",
+ "{'loss': 0.1301, 'grad_norm': 0.6061640381813049, 'learning_rate': 3.3952254641909815e-05, 'epoch': 8.3}\n",
+ "{'loss': 0.1314, 'grad_norm': 0.4381011724472046, 'learning_rate': 3.041556145004421e-05, 'epoch': 8.48}\n",
+ "{'loss': 0.1336, 'grad_norm': 0.4109954535961151, 'learning_rate': 2.6878868258178604e-05, 'epoch': 8.66}\n",
+ "{'loss': 0.1334, 'grad_norm': 0.7153268456459045, 'learning_rate': 2.3342175066313e-05, 'epoch': 8.83}\n",
+ "{'loss': 0.1324, 'grad_norm': 0.5012986063957214, 'learning_rate': 1.9805481874447392e-05, 'epoch': 9.01}\n",
+ "{'loss': 0.1179, 'grad_norm': 0.47853657603263855, 'learning_rate': 1.6268788682581788e-05, 'epoch': 9.19}\n",
+ "{'loss': 0.1182, 'grad_norm': 0.5196290612220764, 'learning_rate': 1.273209549071618e-05, 'epoch': 9.36}\n",
+ "{'loss': 0.1194, 'grad_norm': 0.47395309805870056, 'learning_rate': 9.195402298850575e-06, 'epoch': 9.54}\n",
+ "{'loss': 0.1205, 'grad_norm': 0.8086031675338745, 'learning_rate': 5.658709106984969e-06, 'epoch': 9.72}\n",
+ "{'loss': 0.1225, 'grad_norm': 0.43366023898124695, 'learning_rate': 2.1220159151193635e-06, 'epoch': 9.89}\n",
+ "{'train_runtime': 16717.8313, 'train_samples_per_second': 2.708, 'train_steps_per_second': 0.339, 'train_loss': 0.5991969135540535, 'epoch': 10.0}\n",
+ "100%|█████████████████████████████████████| 5660/5660 [4:38:37<00:00, 2.95s/it]\n",
+ "(5) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "16717.8313 seconds used for training.\n",
+ "278.63 minutes used for training.\n",
+ "Peak reserved memory = 2.367 GB.\n",
+ "Peak reserved memory for training = 0.59 GB.\n",
+ "Peak reserved memory % of max memory = 19.735 %.\n",
+ "Peak reserved memory for training % of max memory = 4.919 %.\n",
+ "Evaluating fine-tuned model: unsloth/Qwen2-1.5B-Instruct\n",
+ "100%|█████████████████████████████████████| 1133/1133 [2:14:26<00:00, 7.12s/it]\n",
+ " chinese ... unsloth/Qwen2-1.5B-Instruct(finetuned)\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Geng raised his pistol, squinted through t...\n",
+ "\n",
+ "[1 rows x 14 columns]\n",
+ "(6) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "5.33 GB of memory reserved.\n",
+ "Unsloth: Merging 4bit and LoRA weights to 4bit...\n",
+ "This might take 5 minutes...\n",
+ "Done.\n",
+ "Unsloth: Saving tokenizer... Done.\n",
+ "Unsloth: Saving model... This might take 10 minutes for Llama-7b... Done.\n",
+ "Unsloth: Merging 4bit and LoRA weights to 4bit...\n",
+ "This might take 5 minutes...\n",
+ "Done.\n",
+ "Unsloth: Saving 4bit Bitsandbytes model. Please wait...\n",
+ "model.safetensors: 100%|███████████████████| 1.22G/1.22G [02:18<00:00, 8.81MB/s]\n",
+ "README.md: 100%|███████████████████████████████| 581/581 [00:00<00:00, 3.67MB/s]\n",
+ "Saved merged_4bit model to https://huggingface.co/Qwen2-1.5B-Instruct-MAC-merged_4bit_forced\n"
+ ]
+ }
+ ],
+ "source": [
+ "!./tune-small-2.sh"
+ ]
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "application/vnd.databricks.v1+notebook": {
+ "dashboards": [],
+ "environmentMetadata": null,
+ "language": "python",
+ "notebookMetadata": {
+ "pythonIndentUnit": 4
+ },
+ "notebookName": "07_MAC_+_Qwen2-7B-Instructi_Unsloth_train",
+ "widgets": {}
+ },
+ "colab": {
+ "gpuType": "T4",
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.14"
+ },
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "036fc5746f43416db18c19ad8fd36677": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "06e806c82c7b4cbea31c5358dd9c3434": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "087b76a8b7514269b1f0ab29b062e444": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_a069d2ab23824f29aa320ac256e2cfe9",
+ "placeholder": "",
+ "style": "IPY_MODEL_06e806c82c7b4cbea31c5358dd9c3434",
+ "value": "Map (num_proc=2): 100%"
+ }
+ },
+ "09b76013aa9e45efb6deb23a7a0d0925": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_dea41c5260884aa6879b5e1d1697b14f",
+ "placeholder": "",
+ "style": "IPY_MODEL_89965917796a4f81b899fdc7685f33df",
+ "value": "config.json: 100%"
+ }
+ },
+ "0a92c56bfa134ef583220d7ef0b13e17": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "0c34be936c8145d3ab41282f30a70713": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "0f8b6bfe16894500838793f2491d403f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "177c78fce95d4b4ab33057c5a048d693": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "1f44c9ce1adf470cbb19784493ed209f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_0c34be936c8145d3ab41282f30a70713",
+ "placeholder": "",
+ "style": "IPY_MODEL_0a92c56bfa134ef583220d7ef0b13e17",
+ "value": "model.safetensors: 100%"
+ }
+ },
+ "201b59ccd9f845e197029b57e424aefc": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "2157f01726d748f8a9ae4a00664430da": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "21db8a77b00d4a4e82fdfa608657531f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "26e4202cca81496a90d15a0dd4ca9cf1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_ba90fdb8822d47dab7ba203bee297f37",
+ "IPY_MODEL_61560ff6a36b44f4a9dfdae5c52791d4",
+ "IPY_MODEL_95fbe66647904c06a20f640630d6dc0e"
+ ],
+ "layout": "IPY_MODEL_57182a263d324a3dbf1471c74290a0d5"
+ }
+ },
+ "27155728b6b84cb199c91c940095d0a8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_6b91feeed5464877991ac2c207aebe7c",
+ "IPY_MODEL_cca8113c54c0495daedce1327bf9c68b",
+ "IPY_MODEL_2e63a29e2f7247bba5beede9a568c99f"
+ ],
+ "layout": "IPY_MODEL_5c9d781c28944f3eb86e2a6d44efdf18"
+ }
+ },
+ "271ddaa553a042d09b6db7b450643d8f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "2a58d04b428c46f4b3dbadd3bc6cd529": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2d18ddf6482c4d97829ac0e5a7b9868f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_9f679ad3ec7f4fe8ad0510ffb57bc2ab",
+ "IPY_MODEL_f2df530d22c74977b249dd9fb5f4829b",
+ "IPY_MODEL_89b2ef0dbfea47ab8e6f8d659e3351d1"
+ ],
+ "layout": "IPY_MODEL_3056b148aa9f4e6e8aa3b61d26886255"
+ }
+ },
+ "2e5087c76f98437cb5dc729230358cba": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2e63a29e2f7247bba5beede9a568c99f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b993eaec6b224440bf80c0958c6fb536",
+ "placeholder": "",
+ "style": "IPY_MODEL_de868e26e7154f62aa86223a539ad421",
+ "value": " 464/464 [00:00<00:00, 27.1kB/s]"
+ }
+ },
+ "2f6c70dd266c4816bfad3fd3d192929a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "30307300bc4e4baf96560e30969a82b6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e36a3f9eff0e4cf68834d66b0213ae96",
+ "placeholder": "",
+ "style": "IPY_MODEL_a0037bdccf254159becde630bee3d1db",
+ "value": "generation_config.json: 100%"
+ }
+ },
+ "3056b148aa9f4e6e8aa3b61d26886255": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "30cdc32298134cb0be4d41615b9e5774": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "3572201bd4d74a58b7a665f9bdfdcdba": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "35b0e8c26d6640e9bd0ed7b242a423d8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_2e5087c76f98437cb5dc729230358cba",
+ "max": 51760,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_036fc5746f43416db18c19ad8fd36677",
+ "value": 51760
+ }
+ },
+ "36166c7bcb854b34aca1f41a5d6ea50b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "370692d819df41828b48c4ad446f977b": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "39b29a75374b45c0a22506010be2b84e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_30cdc32298134cb0be4d41615b9e5774",
+ "max": 1179,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_47928317548c454bba6358ab132e8dee",
+ "value": 1179
+ }
+ },
+ "3cf2dd993b5e4d3daecf61e4bab5a404": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_087b76a8b7514269b1f0ab29b062e444",
+ "IPY_MODEL_35b0e8c26d6640e9bd0ed7b242a423d8",
+ "IPY_MODEL_54ad89e05fd74576b9b8b5b5a10eaf8d"
+ ],
+ "layout": "IPY_MODEL_a41dc44766444a998bec2d777f249d23"
+ }
+ },
+ "43dec2ede91341f5af60eb522e18e984": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4463edd481c1467f914c7dcd6c6e6ffc": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "47928317548c454bba6358ab132e8dee": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "49277aeeac16434a865a4d12308b1abc": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4ae7e449e4ea4c729b5f34607c18ebae": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4b2061b8a73c43ffb0c2f83daf0d0183": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4c4c88d4c701450692fa0f6b0c5764b0": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4c666f4ace3943f8b80ecd20e7503236": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "4ccedf0d93094e63b57a0f8a434fba06": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4463edd481c1467f914c7dcd6c6e6ffc",
+ "max": 44307561,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_6d3b9a05db0b4dadb638c686faa0c40a",
+ "value": 44307561
+ }
+ },
+ "4dcf6ff672d24983a1877a8431709aa9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_5807d5fb827d490fb3bc698f801ffff5",
+ "placeholder": "",
+ "style": "IPY_MODEL_c4f2b06a82fd4987b8b659524a7b503b",
+ "value": "Generating train split: 100%"
+ }
+ },
+ "4ea63adfce694725bdba878aef709dd3": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5234566b1bfc4655b8d582ea5b46ed9f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "54ad89e05fd74576b9b8b5b5a10eaf8d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_fdb1941405ed4e4aa06019933892deb3",
+ "placeholder": "",
+ "style": "IPY_MODEL_668d5377ca56426a99753867e6e24862",
+ "value": " 51760/51760 [01:02<00:00, 1131.51 examples/s]"
+ }
+ },
+ "56aee4853b7740e6a977254f5d1fa66d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "57182a263d324a3dbf1471c74290a0d5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5807d5fb827d490fb3bc698f801ffff5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5c9d781c28944f3eb86e2a6d44efdf18": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5f40db8173dd4d76b6ef5ed6d9ec8b6e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "61560ff6a36b44f4a9dfdae5c52791d4": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_db19fc8d37db4e45a5790a876836d8c4",
+ "max": 11610,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_36166c7bcb854b34aca1f41a5d6ea50b",
+ "value": 11610
+ }
+ },
+ "6578fd7acdb54c4c93528ea431fd0144": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_370692d819df41828b48c4ad446f977b",
+ "placeholder": "",
+ "style": "IPY_MODEL_a0bf9160eb2647409b3200270914b90f",
+ "value": " 50.6k/50.6k [00:00<00:00, 2.71MB/s]"
+ }
+ },
+ "668d5377ca56426a99753867e6e24862": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "697f027529b54ee9956bae78a11e0611": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "69ac12aec0714318bf2c83d4f4e745f5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "6b2012c3f88547af8884a9ea90e3164b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_938f45f1b3e24118b815d96ae34ba86a",
+ "placeholder": "",
+ "style": "IPY_MODEL_9367047a800747f79c6b225d92397846",
+ "value": " 44.3M/44.3M [00:01<00:00, 31.0MB/s]"
+ }
+ },
+ "6b91feeed5464877991ac2c207aebe7c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4b2061b8a73c43ffb0c2f83daf0d0183",
+ "placeholder": "",
+ "style": "IPY_MODEL_69ac12aec0714318bf2c83d4f4e745f5",
+ "value": "special_tokens_map.json: 100%"
+ }
+ },
+ "6d3b9a05db0b4dadb638c686faa0c40a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "6dbbedeca9314e66ae50e44ffa31a414": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "6e34619b45934040b6092e6fb01ea7fe": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "71ce208e20d6483abb9ed923510c86d7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d69dc491b3ab44d7852b21873ed7bb7f",
+ "placeholder": "",
+ "style": "IPY_MODEL_f401d53bf28e44eb906bce6c05412662",
+ "value": " 51760/51760 [00:01<00:00, 45512.81 examples/s]"
+ }
+ },
+ "7358cdad832342c983e31efb8754ab78": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "73e352a3404f4c7dad0737f57d29e92f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_988a0e8c1f89446086858da0a891a79c",
+ "IPY_MODEL_4ccedf0d93094e63b57a0f8a434fba06",
+ "IPY_MODEL_6b2012c3f88547af8884a9ea90e3164b"
+ ],
+ "layout": "IPY_MODEL_7e29cb8dd4df4d5b94407cd8fd3f2011"
+ }
+ },
+ "74501720ac7e4dbb911a4a99b3633bc6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "78e5400bff924a92a4cc61c4ff18b182": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b9b313fd861948f5aba25b24b1518d30",
+ "placeholder": "",
+ "style": "IPY_MODEL_4c666f4ace3943f8b80ecd20e7503236",
+ "value": " 1.18k/1.18k [00:00<00:00, 31.3kB/s]"
+ }
+ },
+ "7975adbc2ec5489ea7fa0167e620d85c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_6e34619b45934040b6092e6fb01ea7fe",
+ "max": 51760,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_271ddaa553a042d09b6db7b450643d8f",
+ "value": 51760
+ }
+ },
+ "7e29cb8dd4df4d5b94407cd8fd3f2011": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "810ff6c0e17d4fa09a30fef27eacff90": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "89965917796a4f81b899fdc7685f33df": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "89b2ef0dbfea47ab8e6f8d659e3351d1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b8908fa0df3743ecb9d12983a739104f",
+ "placeholder": "",
+ "style": "IPY_MODEL_177c78fce95d4b4ab33057c5a048d693",
+ "value": " 9.09M/9.09M [00:00<00:00, 32.6MB/s]"
+ }
+ },
+ "8b3505352a5a42bf910428c40ce40465": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_49277aeeac16434a865a4d12308b1abc",
+ "placeholder": "",
+ "style": "IPY_MODEL_2157f01726d748f8a9ae4a00664430da",
+ "value": " 5.70G/5.70G [01:02<00:00, 30.1MB/s]"
+ }
+ },
+ "8fc142b628fb40568730234de1cafde2": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4ae7e449e4ea4c729b5f34607c18ebae",
+ "max": 172,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_3572201bd4d74a58b7a665f9bdfdcdba",
+ "value": 172
+ }
+ },
+ "9367047a800747f79c6b225d92397846": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "938f45f1b3e24118b815d96ae34ba86a": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "95fbe66647904c06a20f640630d6dc0e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b0a370dc20654b279b9680692e34418e",
+ "placeholder": "",
+ "style": "IPY_MODEL_cfeb365ddf7548d58b2557f22737fcf5",
+ "value": " 11.6k/11.6k [00:00<00:00, 716kB/s]"
+ }
+ },
+ "988a0e8c1f89446086858da0a891a79c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_ad2be500fc164c0f86f33e914ef8e6a0",
+ "placeholder": "",
+ "style": "IPY_MODEL_5234566b1bfc4655b8d582ea5b46ed9f",
+ "value": "Downloading data: 100%"
+ }
+ },
+ "98c58f23f4d549518832cb2d18f796e8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_09b76013aa9e45efb6deb23a7a0d0925",
+ "IPY_MODEL_39b29a75374b45c0a22506010be2b84e",
+ "IPY_MODEL_78e5400bff924a92a4cc61c4ff18b182"
+ ],
+ "layout": "IPY_MODEL_2a58d04b428c46f4b3dbadd3bc6cd529"
+ }
+ },
+ "99fdbb0300c14c139d1937c646f0cfe7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_7358cdad832342c983e31efb8754ab78",
+ "placeholder": "",
+ "style": "IPY_MODEL_e9adf418296e436fb48bb9f78885598b",
+ "value": " 51760/51760 [00:01<00:00, 38665.95 examples/s]"
+ }
+ },
+ "9f679ad3ec7f4fe8ad0510ffb57bc2ab": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4ea63adfce694725bdba878aef709dd3",
+ "placeholder": "",
+ "style": "IPY_MODEL_74501720ac7e4dbb911a4a99b3633bc6",
+ "value": "tokenizer.json: 100%"
+ }
+ },
+ "a0037bdccf254159becde630bee3d1db": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "a069d2ab23824f29aa320ac256e2cfe9": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a0bf9160eb2647409b3200270914b90f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "a41dc44766444a998bec2d777f249d23": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a8464a4c711e4e00aafdfc919b60d07e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_fb995c740590427b882572c81d4e848c",
+ "placeholder": "",
+ "style": "IPY_MODEL_201b59ccd9f845e197029b57e424aefc",
+ "value": " 172/172 [00:00<00:00, 12.0kB/s]"
+ }
+ },
+ "a9f0cc51fc3d4d7b874c32dcf1c5bdf2": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ad2be500fc164c0f86f33e914ef8e6a0": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b0240cd9a4554b29ae11f8051984a1c6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_edaf890370314a218f138015faa0b05d",
+ "placeholder": "",
+ "style": "IPY_MODEL_697f027529b54ee9956bae78a11e0611",
+ "value": "Map: 100%"
+ }
+ },
+ "b0a370dc20654b279b9680692e34418e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b518dcee69074b87be73957cd810e7ed": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d891f8d0b1fc462f8008d02bb2a15692",
+ "placeholder": "",
+ "style": "IPY_MODEL_cced8fd7e998472794f3f3e3018956a5",
+ "value": "tokenizer_config.json: 100%"
+ }
+ },
+ "b8908fa0df3743ecb9d12983a739104f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b993eaec6b224440bf80c0958c6fb536": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b9b313fd861948f5aba25b24b1518d30": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ba90fdb8822d47dab7ba203bee297f37": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_0f8b6bfe16894500838793f2491d403f",
+ "placeholder": "",
+ "style": "IPY_MODEL_bb19f6c747754682a514373a3a0535ba",
+ "value": "Downloading readme: 100%"
+ }
+ },
+ "bb19f6c747754682a514373a3a0535ba": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "bc883d4cf13e4f8b8a4fe5f410cb6efd": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e9159e03e61f4f56978ece9c3bca49b2",
+ "max": 51760,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_810ff6c0e17d4fa09a30fef27eacff90",
+ "value": 51760
+ }
+ },
+ "c161d94df0f04feba9542237e0856c22": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "c22f71b1f85843209d7e5321506b9cb9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_1f44c9ce1adf470cbb19784493ed209f",
+ "IPY_MODEL_f1addc4479d849879e743cf9089e6540",
+ "IPY_MODEL_8b3505352a5a42bf910428c40ce40465"
+ ],
+ "layout": "IPY_MODEL_4c4c88d4c701450692fa0f6b0c5764b0"
+ }
+ },
+ "c4f2b06a82fd4987b8b659524a7b503b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "cca8113c54c0495daedce1327bf9c68b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e02f9b7849c64531835eb77b860d1c93",
+ "max": 464,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_56aee4853b7740e6a977254f5d1fa66d",
+ "value": 464
+ }
+ },
+ "cced8fd7e998472794f3f3e3018956a5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "cf245afeb1c04f29a24d291608c3d157": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_b518dcee69074b87be73957cd810e7ed",
+ "IPY_MODEL_e29104486d594b2992d7285e0ef77371",
+ "IPY_MODEL_6578fd7acdb54c4c93528ea431fd0144"
+ ],
+ "layout": "IPY_MODEL_d35db8148a354c56aaac56dbae22536f"
+ }
+ },
+ "cfe8cae0e22b495bafa221a63d13b283": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "cfeb365ddf7548d58b2557f22737fcf5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "d1b47d39450d4019ae85c9b2f943eeaf": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_4dcf6ff672d24983a1877a8431709aa9",
+ "IPY_MODEL_7975adbc2ec5489ea7fa0167e620d85c",
+ "IPY_MODEL_71ce208e20d6483abb9ed923510c86d7"
+ ],
+ "layout": "IPY_MODEL_cfe8cae0e22b495bafa221a63d13b283"
+ }
+ },
+ "d35db8148a354c56aaac56dbae22536f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d69dc491b3ab44d7852b21873ed7bb7f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d891f8d0b1fc462f8008d02bb2a15692": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d8e5318cead340c4adbeaccc05d39225": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "daf4cd890b35422683d22fd30bc71e83": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_b0240cd9a4554b29ae11f8051984a1c6",
+ "IPY_MODEL_bc883d4cf13e4f8b8a4fe5f410cb6efd",
+ "IPY_MODEL_99fdbb0300c14c139d1937c646f0cfe7"
+ ],
+ "layout": "IPY_MODEL_c161d94df0f04feba9542237e0856c22"
+ }
+ },
+ "db19fc8d37db4e45a5790a876836d8c4": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "de868e26e7154f62aa86223a539ad421": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "dea41c5260884aa6879b5e1d1697b14f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e02f9b7849c64531835eb77b860d1c93": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e29104486d594b2992d7285e0ef77371": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_a9f0cc51fc3d4d7b874c32dcf1c5bdf2",
+ "max": 50641,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_2f6c70dd266c4816bfad3fd3d192929a",
+ "value": 50641
+ }
+ },
+ "e36a3f9eff0e4cf68834d66b0213ae96": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e9159e03e61f4f56978ece9c3bca49b2": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e9adf418296e436fb48bb9f78885598b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "edaf890370314a218f138015faa0b05d": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "f1addc4479d849879e743cf9089e6540": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_43dec2ede91341f5af60eb522e18e984",
+ "max": 5702746405,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_d8e5318cead340c4adbeaccc05d39225",
+ "value": 5702746405
+ }
+ },
+ "f2df530d22c74977b249dd9fb5f4829b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_21db8a77b00d4a4e82fdfa608657531f",
+ "max": 9085698,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_6dbbedeca9314e66ae50e44ffa31a414",
+ "value": 9085698
+ }
+ },
+ "f401d53bf28e44eb906bce6c05412662": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "fb995c740590427b882572c81d4e848c": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "fce7a61c25ec4390af43d92b7c473a45": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_30307300bc4e4baf96560e30969a82b6",
+ "IPY_MODEL_8fc142b628fb40568730234de1cafde2",
+ "IPY_MODEL_a8464a4c711e4e00aafdfc919b60d07e"
+ ],
+ "layout": "IPY_MODEL_5f40db8173dd4d76b6ef5ed6d9ec8b6e"
+ }
+ },
+ "fdb1941405ed4e4aa06019933892deb3": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ }
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/notebooks/05_tune-small-with-flash-attn.ipynb b/notebooks/05_tune-small-with-flash-attn.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..3a3e52a632a46eee834d5e0e6f85ece3e34594dc
--- /dev/null
+++ b/notebooks/05_tune-small-with-flash-attn.ipynb
@@ -0,0 +1,4665 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "0ea8b46b-839b-445b-8043-ccdf4e920ace",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [],
+ "source": [
+ "%load_ext autoreload\n",
+ "%autoreload 2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "6d394937-6c99-4a7c-9d32-7600a280032f",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "workding dir: /home/inflaton/code/projects/courses/novel-translation\n"
+ ]
+ }
+ ],
+ "source": [
+ "import os\n",
+ "import sys\n",
+ "from pathlib import Path\n",
+ "\n",
+ "workding_dir = str(Path.cwd().parent)\n",
+ "os.chdir(workding_dir)\n",
+ "sys.path.append(workding_dir)\n",
+ "print(\"workding dir:\", workding_dir)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "9f67ec60-2f24-411c-84eb-0dd664b44775",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "loading env vars from: /home/inflaton/code/projects/courses/novel-translation/.env\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "True"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from dotenv import find_dotenv, load_dotenv\n",
+ "\n",
+ "found_dotenv = find_dotenv(\".env\")\n",
+ "\n",
+ "if len(found_dotenv) == 0:\n",
+ " found_dotenv = find_dotenv(\".env.example\")\n",
+ "print(f\"loading env vars from: {found_dotenv}\")\n",
+ "load_dotenv(found_dotenv, override=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "f1597656-8042-4878-9d3b-9ebfb8dd86dc",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "('unsloth/Qwen2-0.5B-Instruct-bnb-4bit',\n",
+ " True,\n",
+ " None,\n",
+ " None,\n",
+ " 2048,\n",
+ " 10,\n",
+ " None,\n",
+ " 'datasets/mac/mac.tsv',\n",
+ " 'results/mac-results_v3.csv')"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import os\n",
+ "\n",
+ "model_name = os.getenv(\"MODEL_NAME\")\n",
+ "token = os.getenv(\"HF_TOKEN\") or None\n",
+ "load_in_4bit = os.getenv(\"LOAD_IN_4BIT\") == \"true\"\n",
+ "local_model = os.getenv(\"LOCAL_MODEL\")\n",
+ "hub_model = os.getenv(\"HUB_MODEL\")\n",
+ "num_train_epochs = int(os.getenv(\"NUM_TRAIN_EPOCHS\") or 0)\n",
+ "data_path = os.getenv(\"DATA_PATH\")\n",
+ "results_path = os.getenv(\"RESULTS_PATH\")\n",
+ "\n",
+ "max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!\n",
+ "dtype = (\n",
+ " None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+\n",
+ ")\n",
+ "\n",
+ "model_name, load_in_4bit, local_model, hub_model, max_seq_length, num_train_epochs, dtype, data_path, results_path"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Sun Jun 23 12:46:16 2024 \n",
+ "+---------------------------------------------------------------------------------------+\n",
+ "| NVIDIA-SMI 545.23.07 Driver Version: 546.12 CUDA Version: 12.3 |\n",
+ "|-----------------------------------------+----------------------+----------------------+\n",
+ "| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
+ "| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n",
+ "| | | MIG M. |\n",
+ "|=========================================+======================+======================|\n",
+ "| 0 NVIDIA GeForce RTX 4080 ... On | 00000000:01:00.0 On | N/A |\n",
+ "| N/A 53C P8 5W / 150W | 452MiB / 12282MiB | 11% Default |\n",
+ "| | | N/A |\n",
+ "+-----------------------------------------+----------------------+----------------------+\n",
+ " \n",
+ "+---------------------------------------------------------------------------------------+\n",
+ "| Processes: |\n",
+ "| GPU GI CI PID Type Process name GPU Memory |\n",
+ "| ID ID Usage |\n",
+ "|=======================================================================================|\n",
+ "| No running processes found |\n",
+ "+---------------------------------------------------------------------------------------+\n"
+ ]
+ }
+ ],
+ "source": [
+ "!nvidia-smi"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "CPU times: user 3 μs, sys: 1 μs, total: 4 μs\n",
+ "Wall time: 6.91 μs\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "# !pip install flash-attn --no-build-isolation"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Name: flash-attn\n",
+ "Version: 2.5.9.post1\n",
+ "Summary: Flash Attention: Fast and Memory-Efficient Exact Attention\n",
+ "Home-page: https://github.com/Dao-AILab/flash-attention\n",
+ "Author: Tri Dao\n",
+ "Author-email: trid@cs.stanford.edu\n",
+ "License: \n",
+ "Location: /home/inflaton/miniconda3/envs/unsloth_env/lib/python3.10/site-packages\n",
+ "Requires: einops, torch\n",
+ "Required-by: \n"
+ ]
+ }
+ ],
+ "source": [
+ "!pip show flash-attn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Current Directory:\n",
+ "/home/inflaton/code/projects/courses/novel-translation\n",
+ "Tuning unsloth/Qwen2-0.5B-Instruct-bnb-4bit\n",
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n",
+ "loading /home/inflaton/code/projects/courses/novel-translation/translation_engine_v3.py\n",
+ "loading env vars from: /home/inflaton/code/projects/courses/novel-translation/.env\n",
+ "unsloth/Qwen2-0.5B-Instruct-bnb-4bit True 2048 10 None datasets/mac/mac.tsv results/mac-results_v3.csv True True True\n",
+ "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.0 GB of memory reserved.\n",
+ "loading model: unsloth/Qwen2-0.5B-Instruct-bnb-4bit\n",
+ "==((====))== Unsloth: Fast Qwen2 patching release 2024.6\n",
+ " \\\\ /| GPU: NVIDIA GeForce RTX 4080 Laptop GPU. Max memory: 11.994 GB. Platform = Linux.\n",
+ "O^O/ \\_/ \\ Pytorch: 2.3.0. CUDA = 8.9. CUDA Toolkit = 12.1.\n",
+ "\\ / Bfloat16 = TRUE. Xformers = 0.0.26.post1. FA = True.\n",
+ " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n",
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.633 GB of memory reserved.\n",
+ "loading train/test data files\n",
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n",
+ "Evaluating base model: unsloth/Qwen2-0.5B-Instruct-bnb-4bit\n",
+ " 0%| | 0/1133 [00:00, ?it/s]--------\n",
+ "step 1: Old Teng raised his gun, closing his eyes and gripping a triangular eye. A boom of bullets rang out as he fired one, like thunder crashing down. The hammering sound of steel stones echoed through the branches of the trees. \n",
+ "\n",
+ "The noise was so loud that it made my heart beat faster.<|im_end|>\n",
+ "--------\n",
+ "step 2: Old Teng raised his gun, closing his eyes and gripping a triangular eye. A boom of bullets rang out as he fired one, like thunder crashing down. The hammering sound of steel stones echoed through the branches of the trees. \n",
+ "\n",
+ "The noise was so loud that it made my heart beat faster.\n",
+ "--------\n",
+ "step 3: Old Teng raised his gun, closing his eyes and gripping a triangular eye. A boom of bullets rang out as he fired one, like thunder crashing down. The hammering sound of steel stones echoed through the branches of the trees. \n",
+ "\n",
+ "The noise was so loud that it made my heart beat faster.\n",
+ "100%|█████████████████████████████████████| 1133/1133 [1:06:42<00:00, 3.53s/it]\n",
+ " chinese ... unsloth/Qwen2-0.5B-Instruct-bnb-4bit\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Teng raised his gun, closing his eyes and ...\n",
+ "\n",
+ "[1 rows x 3 columns]\n",
+ "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "3.023 GB of memory reserved.\n",
+ "Unsloth 2024.6 patched 24 layers with 0 QKV layers, 24 O layers and 24 MLP layers.\n",
+ "(4) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "3.023 GB of memory reserved.\n",
+ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n",
+ " \\\\ /| Num examples = 4,528 | Num Epochs = 10\n",
+ "O^O/ \\_/ \\ Batch size per device = 2 | Gradient Accumulation steps = 4\n",
+ "\\ / Total batch size = 8 | Total steps = 5,660\n",
+ " \"-____-\" Number of trainable parameters = 8,798,208\n",
+ "{'loss': 1.9401, 'grad_norm': 0.9639493823051453, 'learning_rate': 0.00019664014146772768, 'epoch': 0.18}\n",
+ "{'loss': 1.7762, 'grad_norm': 0.8060959577560425, 'learning_rate': 0.0001931034482758621, 'epoch': 0.35}\n",
+ "{'loss': 1.7146, 'grad_norm': 0.9296559691429138, 'learning_rate': 0.00018956675508399648, 'epoch': 0.53}\n",
+ "{'loss': 1.7155, 'grad_norm': 0.7544056177139282, 'learning_rate': 0.00018603006189213086, 'epoch': 0.71}\n",
+ "{'loss': 1.6861, 'grad_norm': 0.8301573395729065, 'learning_rate': 0.00018249336870026527, 'epoch': 0.88}\n",
+ "{'loss': 1.6078, 'grad_norm': 0.8045125007629395, 'learning_rate': 0.00017895667550839965, 'epoch': 1.06}\n",
+ "{'loss': 1.4921, 'grad_norm': 0.9031914472579956, 'learning_rate': 0.00017541998231653406, 'epoch': 1.24}\n",
+ "{'loss': 1.5401, 'grad_norm': 0.9808986186981201, 'learning_rate': 0.00017188328912466844, 'epoch': 1.41}\n",
+ "{'loss': 1.4721, 'grad_norm': 1.0783536434173584, 'learning_rate': 0.00016834659593280285, 'epoch': 1.59}\n",
+ "{'loss': 1.4686, 'grad_norm': 0.988545835018158, 'learning_rate': 0.00016480990274093723, 'epoch': 1.77}\n",
+ "{'loss': 1.5278, 'grad_norm': 1.0253351926803589, 'learning_rate': 0.00016127320954907164, 'epoch': 1.94}\n",
+ "{'loss': 1.3493, 'grad_norm': 1.1621149778366089, 'learning_rate': 0.000157736516357206, 'epoch': 2.12}\n",
+ "{'loss': 1.2156, 'grad_norm': 1.1786366701126099, 'learning_rate': 0.0001541998231653404, 'epoch': 2.3}\n",
+ "{'loss': 1.2572, 'grad_norm': 1.3017158508300781, 'learning_rate': 0.0001506631299734748, 'epoch': 2.47}\n",
+ "{'loss': 1.2552, 'grad_norm': 1.2804787158966064, 'learning_rate': 0.0001471264367816092, 'epoch': 2.65}\n",
+ "{'loss': 1.2664, 'grad_norm': 1.1679364442825317, 'learning_rate': 0.0001435897435897436, 'epoch': 2.83}\n",
+ "{'loss': 1.2346, 'grad_norm': 1.171284794807434, 'learning_rate': 0.000140053050397878, 'epoch': 3.0}\n",
+ "{'loss': 0.9801, 'grad_norm': 1.5025601387023926, 'learning_rate': 0.0001365163572060124, 'epoch': 3.18}\n",
+ "{'loss': 0.9924, 'grad_norm': 1.5331358909606934, 'learning_rate': 0.00013297966401414678, 'epoch': 3.36}\n",
+ "{'loss': 0.9986, 'grad_norm': 1.5144548416137695, 'learning_rate': 0.0001294429708222812, 'epoch': 3.53}\n",
+ "{'loss': 0.9813, 'grad_norm': 1.5257072448730469, 'learning_rate': 0.00012590627763041555, 'epoch': 3.71}\n",
+ "{'loss': 1.0233, 'grad_norm': 1.5738170146942139, 'learning_rate': 0.00012236958443854996, 'epoch': 3.89}\n",
+ "{'loss': 0.9316, 'grad_norm': 1.5346697568893433, 'learning_rate': 0.00011883289124668435, 'epoch': 4.06}\n",
+ "{'loss': 0.7391, 'grad_norm': 1.7242717742919922, 'learning_rate': 0.00011529619805481875, 'epoch': 4.24}\n",
+ "{'loss': 0.7396, 'grad_norm': 1.7575305700302124, 'learning_rate': 0.00011175950486295315, 'epoch': 4.42}\n",
+ "{'loss': 0.7765, 'grad_norm': 2.111323833465576, 'learning_rate': 0.00010822281167108754, 'epoch': 4.59}\n",
+ "{'loss': 0.7738, 'grad_norm': 2.2442333698272705, 'learning_rate': 0.00010468611847922194, 'epoch': 4.77}\n",
+ "{'loss': 0.7942, 'grad_norm': 2.135150909423828, 'learning_rate': 0.00010114942528735633, 'epoch': 4.95}\n",
+ "{'loss': 0.6302, 'grad_norm': 1.8503246307373047, 'learning_rate': 9.761273209549072e-05, 'epoch': 5.12}\n",
+ "{'loss': 0.5572, 'grad_norm': 1.59291410446167, 'learning_rate': 9.407603890362513e-05, 'epoch': 5.3}\n",
+ "{'loss': 0.5616, 'grad_norm': 1.7540444135665894, 'learning_rate': 9.053934571175951e-05, 'epoch': 5.48}\n",
+ "{'loss': 0.5898, 'grad_norm': 2.267761707305908, 'learning_rate': 8.70026525198939e-05, 'epoch': 5.65}\n",
+ "{'loss': 0.5818, 'grad_norm': 2.345642328262329, 'learning_rate': 8.34659593280283e-05, 'epoch': 5.83}\n",
+ "{'loss': 0.5804, 'grad_norm': 2.3560101985931396, 'learning_rate': 7.99292661361627e-05, 'epoch': 6.01}\n",
+ "{'loss': 0.3973, 'grad_norm': 3.8223047256469727, 'learning_rate': 7.639257294429708e-05, 'epoch': 6.18}\n",
+ "{'loss': 0.4132, 'grad_norm': 1.865454912185669, 'learning_rate': 7.285587975243147e-05, 'epoch': 6.36}\n",
+ "{'loss': 0.4299, 'grad_norm': 2.473954200744629, 'learning_rate': 6.931918656056587e-05, 'epoch': 6.54}\n",
+ "{'loss': 0.422, 'grad_norm': 1.6875197887420654, 'learning_rate': 6.578249336870027e-05, 'epoch': 6.71}\n",
+ "{'loss': 0.4284, 'grad_norm': 1.5689966678619385, 'learning_rate': 6.224580017683466e-05, 'epoch': 6.89}\n",
+ "{'loss': 0.3869, 'grad_norm': 1.6680887937545776, 'learning_rate': 5.870910698496905e-05, 'epoch': 7.07}\n",
+ "{'loss': 0.2934, 'grad_norm': 1.7025184631347656, 'learning_rate': 5.517241379310345e-05, 'epoch': 7.24}\n",
+ "{'loss': 0.3139, 'grad_norm': 1.5031529664993286, 'learning_rate': 5.163572060123785e-05, 'epoch': 7.42}\n",
+ "{'loss': 0.3133, 'grad_norm': 1.9866334199905396, 'learning_rate': 4.809902740937224e-05, 'epoch': 7.6}\n",
+ "{'loss': 0.306, 'grad_norm': 2.1866486072540283, 'learning_rate': 4.4562334217506634e-05, 'epoch': 7.77}\n",
+ "{'loss': 0.3183, 'grad_norm': 1.4326164722442627, 'learning_rate': 4.1025641025641023e-05, 'epoch': 7.95}\n",
+ "{'loss': 0.2477, 'grad_norm': 1.3497223854064941, 'learning_rate': 3.7488947833775426e-05, 'epoch': 8.13}\n",
+ "{'loss': 0.2262, 'grad_norm': 1.6162991523742676, 'learning_rate': 3.3952254641909815e-05, 'epoch': 8.3}\n",
+ "{'loss': 0.2302, 'grad_norm': 1.0059006214141846, 'learning_rate': 3.041556145004421e-05, 'epoch': 8.48}\n",
+ "{'loss': 0.236, 'grad_norm': 1.3581494092941284, 'learning_rate': 2.6878868258178604e-05, 'epoch': 8.66}\n",
+ "{'loss': 0.2366, 'grad_norm': 1.891054391860962, 'learning_rate': 2.3342175066313e-05, 'epoch': 8.83}\n",
+ "{'loss': 0.238, 'grad_norm': 1.0669790506362915, 'learning_rate': 1.9805481874447392e-05, 'epoch': 9.01}\n",
+ "{'loss': 0.1814, 'grad_norm': 1.2125357389450073, 'learning_rate': 1.6268788682581788e-05, 'epoch': 9.19}\n",
+ "{'loss': 0.1897, 'grad_norm': 1.044737696647644, 'learning_rate': 1.273209549071618e-05, 'epoch': 9.36}\n",
+ "{'loss': 0.1873, 'grad_norm': 1.1148860454559326, 'learning_rate': 9.195402298850575e-06, 'epoch': 9.54}\n",
+ "{'loss': 0.189, 'grad_norm': 1.8938679695129395, 'learning_rate': 5.658709106984969e-06, 'epoch': 9.72}\n",
+ "{'loss': 0.1892, 'grad_norm': 1.0202747583389282, 'learning_rate': 2.1220159151193635e-06, 'epoch': 9.89}\n",
+ "{'train_runtime': 13804.2064, 'train_samples_per_second': 3.28, 'train_steps_per_second': 0.41, 'train_loss': 0.7992495260474539, 'epoch': 10.0}\n",
+ "100%|█████████████████████████████████████| 5660/5660 [3:50:04<00:00, 2.44s/it]\n",
+ "(5) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "13804.2064 seconds used for training.\n",
+ "230.07 minutes used for training.\n",
+ "Peak reserved memory = 3.023 GB.\n",
+ "Peak reserved memory for training = 0.0 GB.\n",
+ "Peak reserved memory % of max memory = 25.204 %.\n",
+ "Peak reserved memory for training % of max memory = 0.0 %.\n",
+ "Evaluating fine-tuned model: unsloth/Qwen2-0.5B-Instruct-bnb-4bit\n",
+ " 0%| | 0/1133 [00:00, ?it/s]--------\n",
+ "step 1: Old Geng raised his rifle, squinted his triangular eye, and fired – a gun, like a ladle, crackled as the shot fell down towards him.<|im_end|>\n",
+ "--------\n",
+ "step 2: Old Geng raised his rifle, squinted his triangular eye, and fired – a gun, like a ladle, crackled as the shot fell down towards him.\n",
+ "--------\n",
+ "step 3: Old Geng raised his rifle, squinted his triangular eye, and fired – a gun, like a ladle, crackled as the shot fell down towards him.\n",
+ "100%|█████████████████████████████████████| 1133/1133 [1:39:47<00:00, 5.28s/it]\n",
+ " chinese ... unsloth/Qwen2-0.5B-Instruct-bnb-4bit(finetuned)\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Geng raised his rifle, squinted his triang...\n",
+ "\n",
+ "[1 rows x 4 columns]\n",
+ "(6) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "3.023 GB of memory reserved.\n",
+ "README.md: 100%|███████████████████████████████| 593/593 [00:00<00:00, 3.39MB/s]\n",
+ "model.safetensors: 100%|█████████████████████| 493M/493M [00:41<00:00, 11.9MB/s]\n",
+ "Saved model to https://huggingface.co/Qwen2-0.5B-Instruct-bnb-4bit-MAC-lora\n",
+ "README.md: 100%|███████████████████████████████| 599/599 [00:00<00:00, 2.91MB/s]\n",
+ "Unsloth: Saving tokenizer... Done.\n",
+ "Unsloth: Saving model... Done.\n",
+ "Unsloth: Saving LoRA adapters. Please wait...\n",
+ "401 Client Error: Unauthorized for url: https://huggingface.co/api/repos/create (Request ID: Root=1-667805f5-6581fe263332f4220001e82f;27494d10-63ec-4751-8bd3-059f4a0c16c7)\n",
+ "\n",
+ "Invalid username or password.\n",
+ "Tuning unsloth/Qwen2-1.5B-Instruct-bnb-4bit\n",
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n",
+ "loading /home/inflaton/code/projects/courses/novel-translation/translation_engine_v3.py\n",
+ "loading env vars from: /home/inflaton/code/projects/courses/novel-translation/.env\n",
+ "unsloth/Qwen2-1.5B-Instruct-bnb-4bit True 2048 10 None datasets/mac/mac.tsv results/mac-results_v3.csv True True True\n",
+ "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.0 GB of memory reserved.\n",
+ "loading model: unsloth/Qwen2-1.5B-Instruct-bnb-4bit\n",
+ "==((====))== Unsloth: Fast Qwen2 patching release 2024.6\n",
+ " \\\\ /| GPU: NVIDIA GeForce RTX 4080 Laptop GPU. Max memory: 11.994 GB. Platform = Linux.\n",
+ "O^O/ \\_/ \\ Pytorch: 2.3.0. CUDA = 8.9. CUDA Toolkit = 12.1.\n",
+ "\\ / Bfloat16 = TRUE. Xformers = 0.0.26.post1. FA = True.\n",
+ " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n",
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "1.516 GB of memory reserved.\n",
+ "loading train/test data files\n",
+ "Map: 100%|█████████████████████████| 4528/4528 [00:00<00:00, 6537.59 examples/s]\n",
+ "Map: 100%|█████████████████████████| 1133/1133 [00:00<00:00, 7281.32 examples/s]\n",
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n",
+ "Evaluating base model: unsloth/Qwen2-1.5B-Instruct-bnb-4bit\n",
+ " 0%| | 0/1133 [00:00, ?it/s]--------\n",
+ "step 1: Old Geer lifted his gun, squinting one eye as he pulled the trigger. A hail of bullets rained down from his rifle. Golden sparrows plopped down from the trees, sandstones flying through the willows, making a clattering sound.<|im_end|>\n",
+ "--------\n",
+ "step 2: Old Geer lifted his gun, squinting one eye as he pulled the trigger. A hail of bullets rained down from his rifle. Golden sparrows plopped down from the trees, sandstones flying through the willows, making a clattering sound.\n",
+ "--------\n",
+ "step 3: Old Geer lifted his gun, squinting one eye as he pulled the trigger. A hail of bullets rained down from his rifle. Golden sparrows plopped down from the trees, sandstones flying through the willows, making a clattering sound.\n",
+ "100%|█████████████████████████████████████| 1133/1133 [1:33:33<00:00, 4.95s/it]\n",
+ " chinese ... unsloth/Qwen2-1.5B-Instruct-bnb-4bit\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Geer lifted his gun, squinting one eye as ...\n",
+ "\n",
+ "[1 rows x 5 columns]\n",
+ "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "3.945 GB of memory reserved.\n",
+ "Unsloth 2024.6 patched 28 layers with 0 QKV layers, 28 O layers and 28 MLP layers.\n",
+ "Map (num_proc=2): 100%|████████████| 4528/4528 [00:01<00:00, 2276.92 examples/s]\n",
+ "(4) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "3.945 GB of memory reserved.\n",
+ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n",
+ " \\\\ /| Num examples = 4,528 | Num Epochs = 10\n",
+ "O^O/ \\_/ \\ Batch size per device = 2 | Gradient Accumulation steps = 4\n",
+ "\\ / Total batch size = 8 | Total steps = 5,660\n",
+ " \"-____-\" Number of trainable parameters = 18,464,768\n",
+ "{'loss': 1.7411, 'grad_norm': 0.658531665802002, 'learning_rate': 0.00019664014146772768, 'epoch': 0.18}\n",
+ "{'loss': 1.5679, 'grad_norm': 0.6339693665504456, 'learning_rate': 0.0001931034482758621, 'epoch': 0.35}\n",
+ "{'loss': 1.5154, 'grad_norm': 0.6460090279579163, 'learning_rate': 0.00018956675508399648, 'epoch': 0.53}\n",
+ "{'loss': 1.5171, 'grad_norm': 0.5758596658706665, 'learning_rate': 0.00018603006189213086, 'epoch': 0.71}\n",
+ "{'loss': 1.4961, 'grad_norm': 0.5699881315231323, 'learning_rate': 0.00018249336870026527, 'epoch': 0.88}\n",
+ "{'loss': 1.4185, 'grad_norm': 0.6029691100120544, 'learning_rate': 0.00017895667550839965, 'epoch': 1.06}\n",
+ "{'loss': 1.2981, 'grad_norm': 0.7669411301612854, 'learning_rate': 0.00017541998231653406, 'epoch': 1.24}\n",
+ "{'loss': 1.3393, 'grad_norm': 0.6863419413566589, 'learning_rate': 0.00017188328912466844, 'epoch': 1.41}\n",
+ "{'loss': 1.2785, 'grad_norm': 0.9242411255836487, 'learning_rate': 0.00016834659593280285, 'epoch': 1.59}\n",
+ "{'loss': 1.2799, 'grad_norm': 0.8570270538330078, 'learning_rate': 0.00016480990274093723, 'epoch': 1.77}\n",
+ "{'loss': 1.3319, 'grad_norm': 1.2672839164733887, 'learning_rate': 0.00016127320954907164, 'epoch': 1.94}\n",
+ "{'loss': 1.1278, 'grad_norm': 0.9834404587745667, 'learning_rate': 0.000157736516357206, 'epoch': 2.12}\n",
+ "{'loss': 0.9963, 'grad_norm': 1.082615852355957, 'learning_rate': 0.0001541998231653404, 'epoch': 2.3}\n",
+ "{'loss': 1.0299, 'grad_norm': 1.1531076431274414, 'learning_rate': 0.0001506631299734748, 'epoch': 2.47}\n",
+ "{'loss': 1.0355, 'grad_norm': 1.0619866847991943, 'learning_rate': 0.0001471264367816092, 'epoch': 2.65}\n",
+ "{'loss': 1.0358, 'grad_norm': 1.0393147468566895, 'learning_rate': 0.0001435897435897436, 'epoch': 2.83}\n",
+ "{'loss': 1.0088, 'grad_norm': 0.8888176679611206, 'learning_rate': 0.000140053050397878, 'epoch': 3.0}\n",
+ "{'loss': 0.6966, 'grad_norm': 1.2902939319610596, 'learning_rate': 0.0001365163572060124, 'epoch': 3.18}\n",
+ "{'loss': 0.7113, 'grad_norm': 1.3367533683776855, 'learning_rate': 0.00013297966401414678, 'epoch': 3.36}\n",
+ "{'loss': 0.7152, 'grad_norm': 1.3983240127563477, 'learning_rate': 0.0001294429708222812, 'epoch': 3.53}\n",
+ "{'loss': 0.7054, 'grad_norm': 1.503348469734192, 'learning_rate': 0.00012590627763041555, 'epoch': 3.71}\n",
+ "{'loss': 0.7441, 'grad_norm': 1.2397220134735107, 'learning_rate': 0.00012236958443854996, 'epoch': 3.89}\n",
+ "{'loss': 0.644, 'grad_norm': 1.1092045307159424, 'learning_rate': 0.00011883289124668435, 'epoch': 4.06}\n",
+ "{'loss': 0.46, 'grad_norm': 1.3230023384094238, 'learning_rate': 0.00011529619805481875, 'epoch': 4.24}\n",
+ "{'loss': 0.4453, 'grad_norm': 1.4391876459121704, 'learning_rate': 0.00011175950486295315, 'epoch': 4.42}\n",
+ "{'loss': 0.4831, 'grad_norm': 1.8462566137313843, 'learning_rate': 0.00010822281167108754, 'epoch': 4.59}\n",
+ "{'loss': 0.4777, 'grad_norm': 2.09181547164917, 'learning_rate': 0.00010468611847922194, 'epoch': 4.77}\n",
+ "{'loss': 0.4871, 'grad_norm': 1.7411134243011475, 'learning_rate': 0.00010114942528735633, 'epoch': 4.95}\n",
+ "{'loss': 0.3573, 'grad_norm': 1.5780448913574219, 'learning_rate': 9.761273209549072e-05, 'epoch': 5.12}\n",
+ "{'loss': 0.2919, 'grad_norm': 1.4988445043563843, 'learning_rate': 9.407603890362513e-05, 'epoch': 5.3}\n",
+ "{'loss': 0.2988, 'grad_norm': 0.9282442927360535, 'learning_rate': 9.053934571175951e-05, 'epoch': 5.48}\n",
+ "{'loss': 0.3139, 'grad_norm': 1.3400734663009644, 'learning_rate': 8.70026525198939e-05, 'epoch': 5.65}\n",
+ "{'loss': 0.3079, 'grad_norm': 1.5739268064498901, 'learning_rate': 8.34659593280283e-05, 'epoch': 5.83}\n",
+ "{'loss': 0.3078, 'grad_norm': 0.6474704742431641, 'learning_rate': 7.99292661361627e-05, 'epoch': 6.01}\n",
+ "{'loss': 0.194, 'grad_norm': 1.3646390438079834, 'learning_rate': 7.639257294429708e-05, 'epoch': 6.18}\n",
+ "{'loss': 0.2034, 'grad_norm': 1.275925636291504, 'learning_rate': 7.285587975243147e-05, 'epoch': 6.36}\n",
+ "{'loss': 0.2082, 'grad_norm': 1.2210203409194946, 'learning_rate': 6.931918656056587e-05, 'epoch': 6.54}\n",
+ "{'loss': 0.205, 'grad_norm': 0.8089584112167358, 'learning_rate': 6.578249336870027e-05, 'epoch': 6.71}\n",
+ "{'loss': 0.2143, 'grad_norm': 1.1388084888458252, 'learning_rate': 6.224580017683466e-05, 'epoch': 6.89}\n",
+ "{'loss': 0.195, 'grad_norm': 0.7365647554397583, 'learning_rate': 5.870910698496905e-05, 'epoch': 7.07}\n",
+ "{'loss': 0.148, 'grad_norm': 0.5169339179992676, 'learning_rate': 5.517241379310345e-05, 'epoch': 7.24}\n",
+ "{'loss': 0.1574, 'grad_norm': 0.9203131198883057, 'learning_rate': 5.163572060123785e-05, 'epoch': 7.42}\n",
+ "{'loss': 0.1573, 'grad_norm': 0.7963287830352783, 'learning_rate': 4.809902740937224e-05, 'epoch': 7.6}\n",
+ "{'loss': 0.1567, 'grad_norm': 0.6291562914848328, 'learning_rate': 4.4562334217506634e-05, 'epoch': 7.77}\n",
+ "{'loss': 0.1591, 'grad_norm': 1.3627387285232544, 'learning_rate': 4.1025641025641023e-05, 'epoch': 7.95}\n",
+ "{'loss': 0.1367, 'grad_norm': 0.6390063166618347, 'learning_rate': 3.7488947833775426e-05, 'epoch': 8.13}\n",
+ "{'loss': 0.1307, 'grad_norm': 0.6079268455505371, 'learning_rate': 3.3952254641909815e-05, 'epoch': 8.3}\n",
+ "{'loss': 0.1313, 'grad_norm': 0.47809019684791565, 'learning_rate': 3.041556145004421e-05, 'epoch': 8.48}\n",
+ "{'loss': 0.1333, 'grad_norm': 0.41499361395835876, 'learning_rate': 2.6878868258178604e-05, 'epoch': 8.66}\n",
+ "{'loss': 0.1335, 'grad_norm': 0.6223477125167847, 'learning_rate': 2.3342175066313e-05, 'epoch': 8.83}\n",
+ "{'loss': 0.1331, 'grad_norm': 0.4558337330818176, 'learning_rate': 1.9805481874447392e-05, 'epoch': 9.01}\n",
+ "{'loss': 0.118, 'grad_norm': 0.4607541561126709, 'learning_rate': 1.6268788682581788e-05, 'epoch': 9.19}\n",
+ "{'loss': 0.1186, 'grad_norm': 0.49737748503685, 'learning_rate': 1.273209549071618e-05, 'epoch': 9.36}\n",
+ "{'loss': 0.1197, 'grad_norm': 0.46176978945732117, 'learning_rate': 9.195402298850575e-06, 'epoch': 9.54}\n",
+ "{'loss': 0.1203, 'grad_norm': 0.48998674750328064, 'learning_rate': 5.658709106984969e-06, 'epoch': 9.72}\n",
+ "{'loss': 0.1231, 'grad_norm': 0.443093866109848, 'learning_rate': 2.1220159151193635e-06, 'epoch': 9.89}\n",
+ "{'train_runtime': 15713.8663, 'train_samples_per_second': 2.882, 'train_steps_per_second': 0.36, 'train_loss': 0.5995376785736624, 'epoch': 10.0}\n",
+ "100%|█████████████████████████████████████| 5660/5660 [4:21:53<00:00, 2.78s/it]\n",
+ "(5) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "15713.8663 seconds used for training.\n",
+ "261.9 minutes used for training.\n",
+ "Peak reserved memory = 3.945 GB.\n",
+ "Peak reserved memory for training = 0.0 GB.\n",
+ "Peak reserved memory % of max memory = 32.891 %.\n",
+ "Peak reserved memory for training % of max memory = 0.0 %.\n",
+ "Evaluating fine-tuned model: unsloth/Qwen2-1.5B-Instruct-bnb-4bit\n",
+ " 0%| | 0/1133 [00:00, ?it/s]--------\n",
+ "step 1: Old Geng raised the pistol to his eye, squeezed the trigger, and some of the shot flew straight into the sky, like ice pellets, as spattered tin shells burst against the willows.<|im_end|>\n",
+ "--------\n",
+ "step 2: Old Geng raised the pistol to his eye, squeezed the trigger, and some of the shot flew straight into the sky, like ice pellets, as spattered tin shells burst against the willows.\n",
+ "--------\n",
+ "step 3: Old Geng raised the pistol to his eye, squeezed the trigger, and some of the shot flew straight into the sky, like ice pellets, as spattered tin shells burst against the willows.\n",
+ "100%|█████████████████████████████████████| 1133/1133 [2:11:23<00:00, 6.96s/it]\n",
+ " chinese ... unsloth/Qwen2-1.5B-Instruct-bnb-4bit(finetuned)\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Geng raised the pistol to his eye, squeeze...\n",
+ "\n",
+ "[1 rows x 6 columns]\n",
+ "(6) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "5.33 GB of memory reserved.\n",
+ "README.md: 100%|███████████████████████████████| 593/593 [00:00<00:00, 3.08MB/s]\n",
+ "model.safetensors: 100%|███████████████████| 1.22G/1.22G [02:18<00:00, 8.82MB/s]\n",
+ "Saved model to https://huggingface.co/Qwen2-1.5B-Instruct-bnb-4bit-MAC-lora\n",
+ "README.md: 100%|███████████████████████████████| 599/599 [00:00<00:00, 2.99MB/s]\n",
+ "Unsloth: Saving tokenizer... Done.\n",
+ "Unsloth: Saving model... Done.\n",
+ "Unsloth: Saving LoRA adapters. Please wait...\n",
+ "401 Client Error: Unauthorized for url: https://huggingface.co/api/repos/create (Request ID: Root=1-66787955-4b5759766262340722a532d6;dcdc16ae-e45d-406a-81c9-13d22426edcb)\n",
+ "\n",
+ "Invalid username or password.\n",
+ "CPU times: user 23min 32s, sys: 8min 51s, total: 32min 24s\n",
+ "Wall time: 14h 50min 41s\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "!./tune-small.sh"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Current Directory:\n",
+ "/home/inflaton/code/projects/courses/novel-translation\n",
+ "Tuning unsloth/Qwen2-0.5B-Instruct\n",
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n",
+ "loading /home/inflaton/code/projects/courses/novel-translation/translation_engine_v3.py\n",
+ "loading env vars from: /home/inflaton/code/projects/courses/novel-translation/.env\n",
+ "unsloth/Qwen2-0.5B-Instruct True 2048 10 None datasets/mac/mac.tsv results/mac-results_v3.csv True True True\n",
+ "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.0 GB of memory reserved.\n",
+ "loading model: unsloth/Qwen2-0.5B-Instruct\n",
+ "==((====))== Unsloth: Fast Qwen2 patching release 2024.6\n",
+ " \\\\ /| GPU: NVIDIA GeForce RTX 4080 Laptop GPU. Max memory: 11.994 GB. Platform = Linux.\n",
+ "O^O/ \\_/ \\ Pytorch: 2.3.0. CUDA = 8.9. CUDA Toolkit = 12.1.\n",
+ "\\ / Bfloat16 = TRUE. Xformers = 0.0.26.post1. FA = True.\n",
+ " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n",
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.633 GB of memory reserved.\n",
+ "loading train/test data files\n",
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n",
+ "Evaluating base model: unsloth/Qwen2-0.5B-Instruct\n",
+ " 0%| | 0/1133 [00:00, ?it/s]--------\n",
+ "step 1: Old耿举起枪,眯着眼睛,枪声轰鸣,子弹砰砰砰地落在地上,一颗颗冰雹般的大鸟扑棱棱地落在柳树之间,咔嚓一声。<|im_end|>\n",
+ "--------\n",
+ "step 2: Old耿举起枪,眯着眼睛,枪声轰鸣,子弹砰砰砰地落在地上,一颗颗冰雹般的大鸟扑棱棱地落在柳树之间,咔嚓一声。\n",
+ "--------\n",
+ "step 3: Old耿举起枪,眯着眼睛,枪声轰鸣,子弹砰砰砰地落在地上,一颗颗冰雹般的大鸟扑棱棱地落在柳树之间,咔嚓一声。\n",
+ "100%|█████████████████████████████████████| 1133/1133 [1:07:20<00:00, 3.57s/it]\n",
+ " chinese ... unsloth/Qwen2-0.5B-Instruct\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old耿举起枪,眯着眼睛,枪声轰鸣,子弹砰砰砰地落在地上,一颗颗冰雹般的大鸟扑棱棱地落在柳树...\n",
+ "\n",
+ "[1 rows x 7 columns]\n",
+ "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.873 GB of memory reserved.\n",
+ "Unsloth 2024.6 patched 24 layers with 0 QKV layers, 24 O layers and 24 MLP layers.\n",
+ "(4) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.873 GB of memory reserved.\n",
+ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n",
+ " \\\\ /| Num examples = 4,528 | Num Epochs = 10\n",
+ "O^O/ \\_/ \\ Batch size per device = 2 | Gradient Accumulation steps = 4\n",
+ "\\ / Total batch size = 8 | Total steps = 5,660\n",
+ " \"-____-\" Number of trainable parameters = 8,798,208\n",
+ "{'loss': 1.9401, 'grad_norm': 0.9639493823051453, 'learning_rate': 0.00019664014146772768, 'epoch': 0.18}\n",
+ "{'loss': 1.7763, 'grad_norm': 0.8041688799858093, 'learning_rate': 0.0001931034482758621, 'epoch': 0.35}\n",
+ "{'loss': 1.7147, 'grad_norm': 0.93106609582901, 'learning_rate': 0.00018956675508399648, 'epoch': 0.53}\n",
+ "{'loss': 1.7156, 'grad_norm': 0.753624677658081, 'learning_rate': 0.00018603006189213086, 'epoch': 0.71}\n",
+ "{'loss': 1.6862, 'grad_norm': 0.823365330696106, 'learning_rate': 0.00018249336870026527, 'epoch': 0.88}\n",
+ "{'loss': 1.6076, 'grad_norm': 0.807159423828125, 'learning_rate': 0.00017895667550839965, 'epoch': 1.06}\n",
+ "{'loss': 1.492, 'grad_norm': 0.9032222032546997, 'learning_rate': 0.00017541998231653406, 'epoch': 1.24}\n",
+ "{'loss': 1.5407, 'grad_norm': 0.9780230522155762, 'learning_rate': 0.00017188328912466844, 'epoch': 1.41}\n",
+ "{'loss': 1.4722, 'grad_norm': 1.0792107582092285, 'learning_rate': 0.00016834659593280285, 'epoch': 1.59}\n",
+ "{'loss': 1.4688, 'grad_norm': 0.9899805784225464, 'learning_rate': 0.00016480990274093723, 'epoch': 1.77}\n",
+ "{'loss': 1.5279, 'grad_norm': 1.0187550783157349, 'learning_rate': 0.00016127320954907164, 'epoch': 1.94}\n",
+ "{'loss': 1.3492, 'grad_norm': 1.1653039455413818, 'learning_rate': 0.000157736516357206, 'epoch': 2.12}\n",
+ "{'loss': 1.2155, 'grad_norm': 1.1759636402130127, 'learning_rate': 0.0001541998231653404, 'epoch': 2.3}\n",
+ "{'loss': 1.258, 'grad_norm': 1.3161606788635254, 'learning_rate': 0.0001506631299734748, 'epoch': 2.47}\n",
+ "{'loss': 1.2556, 'grad_norm': 1.345459222793579, 'learning_rate': 0.0001471264367816092, 'epoch': 2.65}\n",
+ "{'loss': 1.2664, 'grad_norm': 1.1774756908416748, 'learning_rate': 0.0001435897435897436, 'epoch': 2.83}\n",
+ "{'loss': 1.2348, 'grad_norm': 1.1240969896316528, 'learning_rate': 0.000140053050397878, 'epoch': 3.0}\n",
+ "{'loss': 0.9803, 'grad_norm': 1.504067301750183, 'learning_rate': 0.0001365163572060124, 'epoch': 3.18}\n",
+ "{'loss': 0.9925, 'grad_norm': 1.5067857503890991, 'learning_rate': 0.00013297966401414678, 'epoch': 3.36}\n",
+ "{'loss': 0.9989, 'grad_norm': 1.520134449005127, 'learning_rate': 0.0001294429708222812, 'epoch': 3.53}\n",
+ "{'loss': 0.9807, 'grad_norm': 1.5608190298080444, 'learning_rate': 0.00012590627763041555, 'epoch': 3.71}\n",
+ "{'loss': 1.0231, 'grad_norm': 1.6115481853485107, 'learning_rate': 0.00012236958443854996, 'epoch': 3.89}\n",
+ "{'loss': 0.9304, 'grad_norm': 1.5296086072921753, 'learning_rate': 0.00011883289124668435, 'epoch': 4.06}\n",
+ "{'loss': 0.7399, 'grad_norm': 1.6573024988174438, 'learning_rate': 0.00011529619805481875, 'epoch': 4.24}\n",
+ "{'loss': 0.7408, 'grad_norm': 1.7512829303741455, 'learning_rate': 0.00011175950486295315, 'epoch': 4.42}\n",
+ "{'loss': 0.7755, 'grad_norm': 2.100616216659546, 'learning_rate': 0.00010822281167108754, 'epoch': 4.59}\n",
+ "{'loss': 0.7736, 'grad_norm': 2.121138572692871, 'learning_rate': 0.00010468611847922194, 'epoch': 4.77}\n",
+ "{'loss': 0.7939, 'grad_norm': 2.1208739280700684, 'learning_rate': 0.00010114942528735633, 'epoch': 4.95}\n",
+ "{'loss': 0.6306, 'grad_norm': 1.7874706983566284, 'learning_rate': 9.761273209549072e-05, 'epoch': 5.12}\n",
+ "{'loss': 0.5555, 'grad_norm': 1.7197502851486206, 'learning_rate': 9.407603890362513e-05, 'epoch': 5.3}\n",
+ "{'loss': 0.5615, 'grad_norm': 1.6627233028411865, 'learning_rate': 9.053934571175951e-05, 'epoch': 5.48}\n",
+ "{'loss': 0.5916, 'grad_norm': 1.8348921537399292, 'learning_rate': 8.70026525198939e-05, 'epoch': 5.65}\n",
+ "{'loss': 0.5824, 'grad_norm': 2.2089450359344482, 'learning_rate': 8.34659593280283e-05, 'epoch': 5.83}\n",
+ "{'loss': 0.5792, 'grad_norm': 2.447774648666382, 'learning_rate': 7.99292661361627e-05, 'epoch': 6.01}\n",
+ "{'loss': 0.3998, 'grad_norm': 1.8090907335281372, 'learning_rate': 7.639257294429708e-05, 'epoch': 6.18}\n",
+ "{'loss': 0.4113, 'grad_norm': 1.8496599197387695, 'learning_rate': 7.285587975243147e-05, 'epoch': 6.36}\n",
+ "{'loss': 0.4296, 'grad_norm': 2.046454429626465, 'learning_rate': 6.931918656056587e-05, 'epoch': 6.54}\n",
+ "{'loss': 0.4214, 'grad_norm': 1.8460564613342285, 'learning_rate': 6.578249336870027e-05, 'epoch': 6.71}\n",
+ "{'loss': 0.4279, 'grad_norm': 1.7839864492416382, 'learning_rate': 6.224580017683466e-05, 'epoch': 6.89}\n",
+ "{'loss': 0.3849, 'grad_norm': 1.6806727647781372, 'learning_rate': 5.870910698496905e-05, 'epoch': 7.07}\n",
+ "{'loss': 0.2936, 'grad_norm': 1.7148785591125488, 'learning_rate': 5.517241379310345e-05, 'epoch': 7.24}\n",
+ "{'loss': 0.3127, 'grad_norm': 1.2444647550582886, 'learning_rate': 5.163572060123785e-05, 'epoch': 7.42}\n",
+ "{'loss': 0.3143, 'grad_norm': 2.0613274574279785, 'learning_rate': 4.809902740937224e-05, 'epoch': 7.6}\n",
+ "{'loss': 0.3047, 'grad_norm': 1.9974720478057861, 'learning_rate': 4.4562334217506634e-05, 'epoch': 7.77}\n",
+ "{'loss': 0.3181, 'grad_norm': 1.7139118909835815, 'learning_rate': 4.1025641025641023e-05, 'epoch': 7.95}\n",
+ "{'loss': 0.247, 'grad_norm': 1.3952387571334839, 'learning_rate': 3.7488947833775426e-05, 'epoch': 8.13}\n",
+ "{'loss': 0.226, 'grad_norm': 1.4544029235839844, 'learning_rate': 3.3952254641909815e-05, 'epoch': 8.3}\n",
+ "{'loss': 0.2295, 'grad_norm': 1.3040739297866821, 'learning_rate': 3.041556145004421e-05, 'epoch': 8.48}\n",
+ "{'loss': 0.2367, 'grad_norm': 1.0945595502853394, 'learning_rate': 2.6878868258178604e-05, 'epoch': 8.66}\n",
+ "{'loss': 0.236, 'grad_norm': 2.110018491744995, 'learning_rate': 2.3342175066313e-05, 'epoch': 8.83}\n",
+ "{'loss': 0.2384, 'grad_norm': 1.0737488269805908, 'learning_rate': 1.9805481874447392e-05, 'epoch': 9.01}\n",
+ "{'loss': 0.181, 'grad_norm': 1.0586763620376587, 'learning_rate': 1.6268788682581788e-05, 'epoch': 9.19}\n",
+ "{'loss': 0.19, 'grad_norm': 1.11255943775177, 'learning_rate': 1.273209549071618e-05, 'epoch': 9.36}\n",
+ "{'loss': 0.1869, 'grad_norm': 1.0752365589141846, 'learning_rate': 9.195402298850575e-06, 'epoch': 9.54}\n",
+ "{'loss': 0.1881, 'grad_norm': 1.592451810836792, 'learning_rate': 5.658709106984969e-06, 'epoch': 9.72}\n",
+ "{'loss': 0.1889, 'grad_norm': 1.070407748222351, 'learning_rate': 2.1220159151193635e-06, 'epoch': 9.89}\n",
+ "{'train_runtime': 13633.0365, 'train_samples_per_second': 3.321, 'train_steps_per_second': 0.415, 'train_loss': 0.7991274287759625, 'epoch': 10.0}\n",
+ "100%|█████████████████████████████████████| 5660/5660 [3:47:13<00:00, 2.41s/it]\n",
+ "(5) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "13633.0365 seconds used for training.\n",
+ "227.22 minutes used for training.\n",
+ "Peak reserved memory = 1.369 GB.\n",
+ "Peak reserved memory for training = 0.496 GB.\n",
+ "Peak reserved memory % of max memory = 11.414 %.\n",
+ "Peak reserved memory for training % of max memory = 4.135 %.\n",
+ "Evaluating fine-tuned model: unsloth/Qwen2-0.5B-Instruct\n",
+ " 0%| | 0/1133 [00:00, ?it/s]--------\n",
+ "step 1: Old Geng raised his rifle and tilted his head, clasping the trigger, and a crash of iron shrapnel fell beside him, splashing over time boundaries and scattering like ice rain.<|im_end|>\n",
+ "--------\n",
+ "step 2: Old Geng raised his rifle and tilted his head, clasping the trigger, and a crash of iron shrapnel fell beside him, splashing over time boundaries and scattering like ice rain.\n",
+ "--------\n",
+ "step 3: Old Geng raised his rifle and tilted his head, clasping the trigger, and a crash of iron shrapnel fell beside him, splashing over time boundaries and scattering like ice rain.\n",
+ "100%|█████████████████████████████████████| 1133/1133 [1:36:22<00:00, 5.10s/it]\n",
+ " chinese ... unsloth/Qwen2-0.5B-Instruct(finetuned)\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Geng raised his rifle and tilted his head,...\n",
+ "\n",
+ "[1 rows x 8 columns]\n",
+ "(6) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "1.369 GB of memory reserved.\n",
+ "README.md: 100%|███████████████████████████████| 599/599 [00:00<00:00, 3.47MB/s]\n",
+ "model.safetensors: 100%|█████████████████████| 493M/493M [01:42<00:00, 4.83MB/s]\n",
+ "Saved model to https://huggingface.co/Qwen2-0.5B-Instruct-MAC-lora\n",
+ "Unsloth: Saving tokenizer... Done.\n",
+ "Unsloth: Saving model... Done.\n",
+ "Unsloth: Saving LoRA adapters. Please wait...\n",
+ "401 Client Error: Unauthorized for url: https://huggingface.co/api/repos/create (Request ID: Root=1-6678d5ab-7ebfba970b06941d330c774c;eef78700-1bc6-4a4b-82dc-3a0513f85a69)\n",
+ "\n",
+ "Invalid username or password.\n",
+ "Tuning unsloth/Qwen2-1.5B-Instruct\n",
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n",
+ "loading /home/inflaton/code/projects/courses/novel-translation/translation_engine_v3.py\n",
+ "loading env vars from: /home/inflaton/code/projects/courses/novel-translation/.env\n",
+ "unsloth/Qwen2-1.5B-Instruct True 2048 10 None datasets/mac/mac.tsv results/mac-results_v3.csv True True True\n",
+ "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.0 GB of memory reserved.\n",
+ "loading model: unsloth/Qwen2-1.5B-Instruct\n",
+ "==((====))== Unsloth: Fast Qwen2 patching release 2024.6\n",
+ " \\\\ /| GPU: NVIDIA GeForce RTX 4080 Laptop GPU. Max memory: 11.994 GB. Platform = Linux.\n",
+ "O^O/ \\_/ \\ Pytorch: 2.3.0. CUDA = 8.9. CUDA Toolkit = 12.1.\n",
+ "\\ / Bfloat16 = TRUE. Xformers = 0.0.26.post1. FA = True.\n",
+ " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n",
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "1.516 GB of memory reserved.\n",
+ "loading train/test data files\n",
+ "Map: 100%|█████████████████████████| 4528/4528 [00:00<00:00, 9716.63 examples/s]\n",
+ "Map: 100%|█████████████████████████| 1133/1133 [00:00<00:00, 5762.27 examples/s]\n",
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n",
+ "Evaluating base model: unsloth/Qwen2-1.5B-Instruct\n",
+ " 0%| | 0/1133 [00:00, ?it/s]--------\n",
+ "step 1: Old Geer lifted his gun, squinted one of his eyes, clutched it, and fired off a shot like hail of golden sparrows, sand grains flying from the willows, making a sound.<|im_end|>\n",
+ "--------\n",
+ "step 2: Old Geer lifted his gun, squinted one of his eyes, clutched it, and fired off a shot like hail of golden sparrows, sand grains flying from the willows, making a sound.\n",
+ "--------\n",
+ "step 3: Old Geer lifted his gun, squinted one of his eyes, clutched it, and fired off a shot like hail of golden sparrows, sand grains flying from the willows, making a sound.\n",
+ "100%|█████████████████████████████████████| 1133/1133 [1:08:04<00:00, 3.61s/it]\n",
+ " chinese ... unsloth/Qwen2-1.5B-Instruct\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Geer lifted his gun, squinted one of his e...\n",
+ "\n",
+ "[1 rows x 9 columns]\n",
+ "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "1.758 GB of memory reserved.\n",
+ "Unsloth 2024.6 patched 28 layers with 0 QKV layers, 28 O layers and 28 MLP layers.\n",
+ "Map (num_proc=2): 100%|████████████| 4528/4528 [00:02<00:00, 2227.71 examples/s]\n",
+ "(4) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "1.758 GB of memory reserved.\n",
+ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n",
+ " \\\\ /| Num examples = 4,528 | Num Epochs = 10\n",
+ "O^O/ \\_/ \\ Batch size per device = 2 | Gradient Accumulation steps = 4\n",
+ "\\ / Total batch size = 8 | Total steps = 5,660\n",
+ " \"-____-\" Number of trainable parameters = 18,464,768\n",
+ "{'loss': 1.7417, 'grad_norm': 0.6435620188713074, 'learning_rate': 0.00019664014146772768, 'epoch': 0.18}\n",
+ "{'loss': 1.5681, 'grad_norm': 0.6191736459732056, 'learning_rate': 0.0001931034482758621, 'epoch': 0.35}\n",
+ "{'loss': 1.516, 'grad_norm': 0.6416980028152466, 'learning_rate': 0.00018956675508399648, 'epoch': 0.53}\n",
+ "{'loss': 1.5173, 'grad_norm': 0.5749340057373047, 'learning_rate': 0.00018603006189213086, 'epoch': 0.71}\n",
+ "{'loss': 1.4962, 'grad_norm': 0.5725200176239014, 'learning_rate': 0.00018249336870026527, 'epoch': 0.88}\n",
+ "{'loss': 1.4184, 'grad_norm': 0.6020503640174866, 'learning_rate': 0.00017895667550839965, 'epoch': 1.06}\n",
+ "{'loss': 1.2973, 'grad_norm': 0.7252504229545593, 'learning_rate': 0.00017541998231653406, 'epoch': 1.24}\n",
+ "{'loss': 1.3397, 'grad_norm': 0.6848253011703491, 'learning_rate': 0.00017188328912466844, 'epoch': 1.41}\n",
+ "{'loss': 1.2787, 'grad_norm': 0.9209019541740417, 'learning_rate': 0.00016834659593280285, 'epoch': 1.59}\n",
+ "{'loss': 1.2797, 'grad_norm': 0.8531824946403503, 'learning_rate': 0.00016480990274093723, 'epoch': 1.77}\n",
+ "{'loss': 1.3313, 'grad_norm': 0.8282666206359863, 'learning_rate': 0.00016127320954907164, 'epoch': 1.94}\n",
+ "{'loss': 1.1277, 'grad_norm': 0.9999461770057678, 'learning_rate': 0.000157736516357206, 'epoch': 2.12}\n",
+ "{'loss': 0.9964, 'grad_norm': 1.1321799755096436, 'learning_rate': 0.0001541998231653404, 'epoch': 2.3}\n",
+ "{'loss': 1.0302, 'grad_norm': 1.2508618831634521, 'learning_rate': 0.0001506631299734748, 'epoch': 2.47}\n",
+ "{'loss': 1.0345, 'grad_norm': 1.0669715404510498, 'learning_rate': 0.0001471264367816092, 'epoch': 2.65}\n",
+ "{'loss': 1.0366, 'grad_norm': 1.0575922727584839, 'learning_rate': 0.0001435897435897436, 'epoch': 2.83}\n",
+ "{'loss': 1.0088, 'grad_norm': 0.8964874148368835, 'learning_rate': 0.000140053050397878, 'epoch': 3.0}\n",
+ "{'loss': 0.697, 'grad_norm': 1.2907732725143433, 'learning_rate': 0.0001365163572060124, 'epoch': 3.18}\n",
+ "{'loss': 0.7093, 'grad_norm': 1.3694052696228027, 'learning_rate': 0.00013297966401414678, 'epoch': 3.36}\n",
+ "{'loss': 0.7154, 'grad_norm': 1.3360122442245483, 'learning_rate': 0.0001294429708222812, 'epoch': 3.53}\n",
+ "{'loss': 0.7039, 'grad_norm': 1.3949358463287354, 'learning_rate': 0.00012590627763041555, 'epoch': 3.71}\n",
+ "{'loss': 0.7428, 'grad_norm': 1.2718554735183716, 'learning_rate': 0.00012236958443854996, 'epoch': 3.89}\n",
+ "{'loss': 0.643, 'grad_norm': 1.135117769241333, 'learning_rate': 0.00011883289124668435, 'epoch': 4.06}\n",
+ "{'loss': 0.4595, 'grad_norm': 1.454145908355713, 'learning_rate': 0.00011529619805481875, 'epoch': 4.24}\n",
+ "{'loss': 0.444, 'grad_norm': 1.6696418523788452, 'learning_rate': 0.00011175950486295315, 'epoch': 4.42}\n",
+ "{'loss': 0.4826, 'grad_norm': 1.7666652202606201, 'learning_rate': 0.00010822281167108754, 'epoch': 4.59}\n",
+ "{'loss': 0.4795, 'grad_norm': 1.742395281791687, 'learning_rate': 0.00010468611847922194, 'epoch': 4.77}\n",
+ "{'loss': 0.4856, 'grad_norm': 1.5103402137756348, 'learning_rate': 0.00010114942528735633, 'epoch': 4.95}\n",
+ "{'loss': 0.3593, 'grad_norm': 1.3078151941299438, 'learning_rate': 9.761273209549072e-05, 'epoch': 5.12}\n",
+ "{'loss': 0.2904, 'grad_norm': 1.2514950037002563, 'learning_rate': 9.407603890362513e-05, 'epoch': 5.3}\n",
+ "{'loss': 0.2991, 'grad_norm': 1.658071756362915, 'learning_rate': 9.053934571175951e-05, 'epoch': 5.48}\n",
+ "{'loss': 0.3144, 'grad_norm': 1.6055220365524292, 'learning_rate': 8.70026525198939e-05, 'epoch': 5.65}\n",
+ "{'loss': 0.3093, 'grad_norm': 1.5153316259384155, 'learning_rate': 8.34659593280283e-05, 'epoch': 5.83}\n",
+ "{'loss': 0.3058, 'grad_norm': 0.9724624752998352, 'learning_rate': 7.99292661361627e-05, 'epoch': 6.01}\n",
+ "{'loss': 0.1934, 'grad_norm': 0.954325795173645, 'learning_rate': 7.639257294429708e-05, 'epoch': 6.18}\n",
+ "{'loss': 0.2036, 'grad_norm': 1.537642240524292, 'learning_rate': 7.285587975243147e-05, 'epoch': 6.36}\n",
+ "{'loss': 0.2079, 'grad_norm': 1.2404054403305054, 'learning_rate': 6.931918656056587e-05, 'epoch': 6.54}\n",
+ "{'loss': 0.2049, 'grad_norm': 1.0820835828781128, 'learning_rate': 6.578249336870027e-05, 'epoch': 6.71}\n",
+ "{'loss': 0.215, 'grad_norm': 1.32541024684906, 'learning_rate': 6.224580017683466e-05, 'epoch': 6.89}\n",
+ "{'loss': 0.1946, 'grad_norm': 0.7059425711631775, 'learning_rate': 5.870910698496905e-05, 'epoch': 7.07}\n",
+ "{'loss': 0.1489, 'grad_norm': 1.0154873132705688, 'learning_rate': 5.517241379310345e-05, 'epoch': 7.24}\n",
+ "{'loss': 0.1581, 'grad_norm': 0.7846829891204834, 'learning_rate': 5.163572060123785e-05, 'epoch': 7.42}\n",
+ "{'loss': 0.1593, 'grad_norm': 0.9743372201919556, 'learning_rate': 4.809902740937224e-05, 'epoch': 7.6}\n",
+ "{'loss': 0.1568, 'grad_norm': 0.8308056592941284, 'learning_rate': 4.4562334217506634e-05, 'epoch': 7.77}\n",
+ "{'loss': 0.1587, 'grad_norm': 0.8165437579154968, 'learning_rate': 4.1025641025641023e-05, 'epoch': 7.95}\n",
+ "{'loss': 0.137, 'grad_norm': 1.081515908241272, 'learning_rate': 3.7488947833775426e-05, 'epoch': 8.13}\n",
+ "{'loss': 0.1305, 'grad_norm': 0.6178275942802429, 'learning_rate': 3.3952254641909815e-05, 'epoch': 8.3}\n",
+ "{'loss': 0.1318, 'grad_norm': 0.4706704914569855, 'learning_rate': 3.041556145004421e-05, 'epoch': 8.48}\n",
+ "{'loss': 0.1331, 'grad_norm': 0.41128799319267273, 'learning_rate': 2.6878868258178604e-05, 'epoch': 8.66}\n",
+ "{'loss': 0.1349, 'grad_norm': 0.674155056476593, 'learning_rate': 2.3342175066313e-05, 'epoch': 8.83}\n",
+ "{'loss': 0.1327, 'grad_norm': 0.49978604912757874, 'learning_rate': 1.9805481874447392e-05, 'epoch': 9.01}\n",
+ "{'loss': 0.1177, 'grad_norm': 0.44771721959114075, 'learning_rate': 1.6268788682581788e-05, 'epoch': 9.19}\n",
+ "{'loss': 0.1187, 'grad_norm': 0.587639331817627, 'learning_rate': 1.273209549071618e-05, 'epoch': 9.36}\n",
+ "{'loss': 0.1198, 'grad_norm': 0.47292351722717285, 'learning_rate': 9.195402298850575e-06, 'epoch': 9.54}\n",
+ "{'loss': 0.1203, 'grad_norm': 0.5652945041656494, 'learning_rate': 5.658709106984969e-06, 'epoch': 9.72}\n",
+ "{'loss': 0.1232, 'grad_norm': 0.93674236536026, 'learning_rate': 2.1220159151193635e-06, 'epoch': 9.89}\n",
+ "{'train_runtime': 15791.9321, 'train_samples_per_second': 2.867, 'train_steps_per_second': 0.358, 'train_loss': 0.5995039068767966, 'epoch': 10.0}\n",
+ "100%|█████████████████████████████████████| 5660/5660 [4:23:11<00:00, 2.79s/it]\n",
+ "(5) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "15791.9321 seconds used for training.\n",
+ "263.2 minutes used for training.\n",
+ "Peak reserved memory = 2.365 GB.\n",
+ "Peak reserved memory for training = 0.607 GB.\n",
+ "Peak reserved memory % of max memory = 19.718 %.\n",
+ "Peak reserved memory for training % of max memory = 5.061 %.\n",
+ "Evaluating fine-tuned model: unsloth/Qwen2-1.5B-Instruct\n",
+ " 0%| | 0/1133 [00:00, ?it/s]--------\n",
+ "step 1: Old Geng raised his gun, squinted, and emptied it. The cocoon of bullets split open as they flew, like ice pellets, until it was gone, sending chiseling sounds tumbling through the air as iron seeds smashed pruneflower stems.<|im_end|>\n",
+ "--------\n",
+ "step 2: Old Geng raised his gun, squinted, and emptied it. The cocoon of bullets split open as they flew, like ice pellets, until it was gone, sending chiseling sounds tumbling through the air as iron seeds smashed pruneflower stems.\n",
+ "--------\n",
+ "step 3: Old Geng raised his gun, squinted, and emptied it. The cocoon of bullets split open as they flew, like ice pellets, until it was gone, sending chiseling sounds tumbling through the air as iron seeds smashed pruneflower stems.\n",
+ "100%|█████████████████████████████████████| 1133/1133 [2:10:28<00:00, 6.91s/it]\n",
+ " chinese ... unsloth/Qwen2-1.5B-Instruct(finetuned)\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Geng raised his gun, squinted, and emptied...\n",
+ "\n",
+ "[1 rows x 10 columns]\n",
+ "(6) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "5.33 GB of memory reserved.\n",
+ "README.md: 100%|███████████████████████████████| 599/599 [00:00<00:00, 3.36MB/s]\n",
+ "model.safetensors: 100%|███████████████████| 1.22G/1.22G [04:57<00:00, 4.09MB/s]\n",
+ "Saved model to https://huggingface.co/Qwen2-1.5B-Instruct-MAC-lora\n",
+ "Unsloth: Saving tokenizer... Done.\n",
+ "Unsloth: Saving model... Done.\n",
+ "Unsloth: Saving LoRA adapters. Please wait...\n",
+ "401 Client Error: Unauthorized for url: https://huggingface.co/api/repos/create (Request ID: Root=1-667943a2-3b0230ba1811ed550b585d53;2d0f1d17-f232-41f6-9eab-add0e87114f8)\n",
+ "\n",
+ "Invalid username or password.\n",
+ "CPU times: user 23min 30s, sys: 8min 24s, total: 31min 55s\n",
+ "Wall time: 14h 23min 14s\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "!./tune-small-2.sh"
+ ]
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "application/vnd.databricks.v1+notebook": {
+ "dashboards": [],
+ "environmentMetadata": null,
+ "language": "python",
+ "notebookMetadata": {
+ "pythonIndentUnit": 4
+ },
+ "notebookName": "07_MAC_+_Qwen2-7B-Instructi_Unsloth_train",
+ "widgets": {}
+ },
+ "colab": {
+ "gpuType": "T4",
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.14"
+ },
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "036fc5746f43416db18c19ad8fd36677": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "06e806c82c7b4cbea31c5358dd9c3434": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "087b76a8b7514269b1f0ab29b062e444": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_a069d2ab23824f29aa320ac256e2cfe9",
+ "placeholder": "",
+ "style": "IPY_MODEL_06e806c82c7b4cbea31c5358dd9c3434",
+ "value": "Map (num_proc=2): 100%"
+ }
+ },
+ "09b76013aa9e45efb6deb23a7a0d0925": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_dea41c5260884aa6879b5e1d1697b14f",
+ "placeholder": "",
+ "style": "IPY_MODEL_89965917796a4f81b899fdc7685f33df",
+ "value": "config.json: 100%"
+ }
+ },
+ "0a92c56bfa134ef583220d7ef0b13e17": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "0c34be936c8145d3ab41282f30a70713": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "0f8b6bfe16894500838793f2491d403f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "177c78fce95d4b4ab33057c5a048d693": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "1f44c9ce1adf470cbb19784493ed209f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_0c34be936c8145d3ab41282f30a70713",
+ "placeholder": "",
+ "style": "IPY_MODEL_0a92c56bfa134ef583220d7ef0b13e17",
+ "value": "model.safetensors: 100%"
+ }
+ },
+ "201b59ccd9f845e197029b57e424aefc": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "2157f01726d748f8a9ae4a00664430da": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "21db8a77b00d4a4e82fdfa608657531f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "26e4202cca81496a90d15a0dd4ca9cf1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_ba90fdb8822d47dab7ba203bee297f37",
+ "IPY_MODEL_61560ff6a36b44f4a9dfdae5c52791d4",
+ "IPY_MODEL_95fbe66647904c06a20f640630d6dc0e"
+ ],
+ "layout": "IPY_MODEL_57182a263d324a3dbf1471c74290a0d5"
+ }
+ },
+ "27155728b6b84cb199c91c940095d0a8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_6b91feeed5464877991ac2c207aebe7c",
+ "IPY_MODEL_cca8113c54c0495daedce1327bf9c68b",
+ "IPY_MODEL_2e63a29e2f7247bba5beede9a568c99f"
+ ],
+ "layout": "IPY_MODEL_5c9d781c28944f3eb86e2a6d44efdf18"
+ }
+ },
+ "271ddaa553a042d09b6db7b450643d8f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "2a58d04b428c46f4b3dbadd3bc6cd529": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2d18ddf6482c4d97829ac0e5a7b9868f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_9f679ad3ec7f4fe8ad0510ffb57bc2ab",
+ "IPY_MODEL_f2df530d22c74977b249dd9fb5f4829b",
+ "IPY_MODEL_89b2ef0dbfea47ab8e6f8d659e3351d1"
+ ],
+ "layout": "IPY_MODEL_3056b148aa9f4e6e8aa3b61d26886255"
+ }
+ },
+ "2e5087c76f98437cb5dc729230358cba": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2e63a29e2f7247bba5beede9a568c99f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b993eaec6b224440bf80c0958c6fb536",
+ "placeholder": "",
+ "style": "IPY_MODEL_de868e26e7154f62aa86223a539ad421",
+ "value": " 464/464 [00:00<00:00, 27.1kB/s]"
+ }
+ },
+ "2f6c70dd266c4816bfad3fd3d192929a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "30307300bc4e4baf96560e30969a82b6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e36a3f9eff0e4cf68834d66b0213ae96",
+ "placeholder": "",
+ "style": "IPY_MODEL_a0037bdccf254159becde630bee3d1db",
+ "value": "generation_config.json: 100%"
+ }
+ },
+ "3056b148aa9f4e6e8aa3b61d26886255": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "30cdc32298134cb0be4d41615b9e5774": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "3572201bd4d74a58b7a665f9bdfdcdba": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "35b0e8c26d6640e9bd0ed7b242a423d8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_2e5087c76f98437cb5dc729230358cba",
+ "max": 51760,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_036fc5746f43416db18c19ad8fd36677",
+ "value": 51760
+ }
+ },
+ "36166c7bcb854b34aca1f41a5d6ea50b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "370692d819df41828b48c4ad446f977b": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "39b29a75374b45c0a22506010be2b84e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_30cdc32298134cb0be4d41615b9e5774",
+ "max": 1179,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_47928317548c454bba6358ab132e8dee",
+ "value": 1179
+ }
+ },
+ "3cf2dd993b5e4d3daecf61e4bab5a404": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_087b76a8b7514269b1f0ab29b062e444",
+ "IPY_MODEL_35b0e8c26d6640e9bd0ed7b242a423d8",
+ "IPY_MODEL_54ad89e05fd74576b9b8b5b5a10eaf8d"
+ ],
+ "layout": "IPY_MODEL_a41dc44766444a998bec2d777f249d23"
+ }
+ },
+ "43dec2ede91341f5af60eb522e18e984": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4463edd481c1467f914c7dcd6c6e6ffc": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "47928317548c454bba6358ab132e8dee": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "49277aeeac16434a865a4d12308b1abc": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4ae7e449e4ea4c729b5f34607c18ebae": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4b2061b8a73c43ffb0c2f83daf0d0183": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4c4c88d4c701450692fa0f6b0c5764b0": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4c666f4ace3943f8b80ecd20e7503236": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "4ccedf0d93094e63b57a0f8a434fba06": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4463edd481c1467f914c7dcd6c6e6ffc",
+ "max": 44307561,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_6d3b9a05db0b4dadb638c686faa0c40a",
+ "value": 44307561
+ }
+ },
+ "4dcf6ff672d24983a1877a8431709aa9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_5807d5fb827d490fb3bc698f801ffff5",
+ "placeholder": "",
+ "style": "IPY_MODEL_c4f2b06a82fd4987b8b659524a7b503b",
+ "value": "Generating train split: 100%"
+ }
+ },
+ "4ea63adfce694725bdba878aef709dd3": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5234566b1bfc4655b8d582ea5b46ed9f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "54ad89e05fd74576b9b8b5b5a10eaf8d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_fdb1941405ed4e4aa06019933892deb3",
+ "placeholder": "",
+ "style": "IPY_MODEL_668d5377ca56426a99753867e6e24862",
+ "value": " 51760/51760 [01:02<00:00, 1131.51 examples/s]"
+ }
+ },
+ "56aee4853b7740e6a977254f5d1fa66d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "57182a263d324a3dbf1471c74290a0d5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5807d5fb827d490fb3bc698f801ffff5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5c9d781c28944f3eb86e2a6d44efdf18": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5f40db8173dd4d76b6ef5ed6d9ec8b6e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "61560ff6a36b44f4a9dfdae5c52791d4": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_db19fc8d37db4e45a5790a876836d8c4",
+ "max": 11610,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_36166c7bcb854b34aca1f41a5d6ea50b",
+ "value": 11610
+ }
+ },
+ "6578fd7acdb54c4c93528ea431fd0144": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_370692d819df41828b48c4ad446f977b",
+ "placeholder": "",
+ "style": "IPY_MODEL_a0bf9160eb2647409b3200270914b90f",
+ "value": " 50.6k/50.6k [00:00<00:00, 2.71MB/s]"
+ }
+ },
+ "668d5377ca56426a99753867e6e24862": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "697f027529b54ee9956bae78a11e0611": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "69ac12aec0714318bf2c83d4f4e745f5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "6b2012c3f88547af8884a9ea90e3164b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_938f45f1b3e24118b815d96ae34ba86a",
+ "placeholder": "",
+ "style": "IPY_MODEL_9367047a800747f79c6b225d92397846",
+ "value": " 44.3M/44.3M [00:01<00:00, 31.0MB/s]"
+ }
+ },
+ "6b91feeed5464877991ac2c207aebe7c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4b2061b8a73c43ffb0c2f83daf0d0183",
+ "placeholder": "",
+ "style": "IPY_MODEL_69ac12aec0714318bf2c83d4f4e745f5",
+ "value": "special_tokens_map.json: 100%"
+ }
+ },
+ "6d3b9a05db0b4dadb638c686faa0c40a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "6dbbedeca9314e66ae50e44ffa31a414": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "6e34619b45934040b6092e6fb01ea7fe": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "71ce208e20d6483abb9ed923510c86d7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d69dc491b3ab44d7852b21873ed7bb7f",
+ "placeholder": "",
+ "style": "IPY_MODEL_f401d53bf28e44eb906bce6c05412662",
+ "value": " 51760/51760 [00:01<00:00, 45512.81 examples/s]"
+ }
+ },
+ "7358cdad832342c983e31efb8754ab78": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "73e352a3404f4c7dad0737f57d29e92f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_988a0e8c1f89446086858da0a891a79c",
+ "IPY_MODEL_4ccedf0d93094e63b57a0f8a434fba06",
+ "IPY_MODEL_6b2012c3f88547af8884a9ea90e3164b"
+ ],
+ "layout": "IPY_MODEL_7e29cb8dd4df4d5b94407cd8fd3f2011"
+ }
+ },
+ "74501720ac7e4dbb911a4a99b3633bc6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "78e5400bff924a92a4cc61c4ff18b182": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b9b313fd861948f5aba25b24b1518d30",
+ "placeholder": "",
+ "style": "IPY_MODEL_4c666f4ace3943f8b80ecd20e7503236",
+ "value": " 1.18k/1.18k [00:00<00:00, 31.3kB/s]"
+ }
+ },
+ "7975adbc2ec5489ea7fa0167e620d85c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_6e34619b45934040b6092e6fb01ea7fe",
+ "max": 51760,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_271ddaa553a042d09b6db7b450643d8f",
+ "value": 51760
+ }
+ },
+ "7e29cb8dd4df4d5b94407cd8fd3f2011": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "810ff6c0e17d4fa09a30fef27eacff90": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "89965917796a4f81b899fdc7685f33df": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "89b2ef0dbfea47ab8e6f8d659e3351d1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b8908fa0df3743ecb9d12983a739104f",
+ "placeholder": "",
+ "style": "IPY_MODEL_177c78fce95d4b4ab33057c5a048d693",
+ "value": " 9.09M/9.09M [00:00<00:00, 32.6MB/s]"
+ }
+ },
+ "8b3505352a5a42bf910428c40ce40465": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_49277aeeac16434a865a4d12308b1abc",
+ "placeholder": "",
+ "style": "IPY_MODEL_2157f01726d748f8a9ae4a00664430da",
+ "value": " 5.70G/5.70G [01:02<00:00, 30.1MB/s]"
+ }
+ },
+ "8fc142b628fb40568730234de1cafde2": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4ae7e449e4ea4c729b5f34607c18ebae",
+ "max": 172,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_3572201bd4d74a58b7a665f9bdfdcdba",
+ "value": 172
+ }
+ },
+ "9367047a800747f79c6b225d92397846": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "938f45f1b3e24118b815d96ae34ba86a": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "95fbe66647904c06a20f640630d6dc0e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b0a370dc20654b279b9680692e34418e",
+ "placeholder": "",
+ "style": "IPY_MODEL_cfeb365ddf7548d58b2557f22737fcf5",
+ "value": " 11.6k/11.6k [00:00<00:00, 716kB/s]"
+ }
+ },
+ "988a0e8c1f89446086858da0a891a79c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_ad2be500fc164c0f86f33e914ef8e6a0",
+ "placeholder": "",
+ "style": "IPY_MODEL_5234566b1bfc4655b8d582ea5b46ed9f",
+ "value": "Downloading data: 100%"
+ }
+ },
+ "98c58f23f4d549518832cb2d18f796e8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_09b76013aa9e45efb6deb23a7a0d0925",
+ "IPY_MODEL_39b29a75374b45c0a22506010be2b84e",
+ "IPY_MODEL_78e5400bff924a92a4cc61c4ff18b182"
+ ],
+ "layout": "IPY_MODEL_2a58d04b428c46f4b3dbadd3bc6cd529"
+ }
+ },
+ "99fdbb0300c14c139d1937c646f0cfe7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_7358cdad832342c983e31efb8754ab78",
+ "placeholder": "",
+ "style": "IPY_MODEL_e9adf418296e436fb48bb9f78885598b",
+ "value": " 51760/51760 [00:01<00:00, 38665.95 examples/s]"
+ }
+ },
+ "9f679ad3ec7f4fe8ad0510ffb57bc2ab": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4ea63adfce694725bdba878aef709dd3",
+ "placeholder": "",
+ "style": "IPY_MODEL_74501720ac7e4dbb911a4a99b3633bc6",
+ "value": "tokenizer.json: 100%"
+ }
+ },
+ "a0037bdccf254159becde630bee3d1db": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "a069d2ab23824f29aa320ac256e2cfe9": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a0bf9160eb2647409b3200270914b90f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "a41dc44766444a998bec2d777f249d23": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a8464a4c711e4e00aafdfc919b60d07e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_fb995c740590427b882572c81d4e848c",
+ "placeholder": "",
+ "style": "IPY_MODEL_201b59ccd9f845e197029b57e424aefc",
+ "value": " 172/172 [00:00<00:00, 12.0kB/s]"
+ }
+ },
+ "a9f0cc51fc3d4d7b874c32dcf1c5bdf2": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ad2be500fc164c0f86f33e914ef8e6a0": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b0240cd9a4554b29ae11f8051984a1c6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_edaf890370314a218f138015faa0b05d",
+ "placeholder": "",
+ "style": "IPY_MODEL_697f027529b54ee9956bae78a11e0611",
+ "value": "Map: 100%"
+ }
+ },
+ "b0a370dc20654b279b9680692e34418e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b518dcee69074b87be73957cd810e7ed": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d891f8d0b1fc462f8008d02bb2a15692",
+ "placeholder": "",
+ "style": "IPY_MODEL_cced8fd7e998472794f3f3e3018956a5",
+ "value": "tokenizer_config.json: 100%"
+ }
+ },
+ "b8908fa0df3743ecb9d12983a739104f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b993eaec6b224440bf80c0958c6fb536": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b9b313fd861948f5aba25b24b1518d30": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ba90fdb8822d47dab7ba203bee297f37": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_0f8b6bfe16894500838793f2491d403f",
+ "placeholder": "",
+ "style": "IPY_MODEL_bb19f6c747754682a514373a3a0535ba",
+ "value": "Downloading readme: 100%"
+ }
+ },
+ "bb19f6c747754682a514373a3a0535ba": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "bc883d4cf13e4f8b8a4fe5f410cb6efd": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e9159e03e61f4f56978ece9c3bca49b2",
+ "max": 51760,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_810ff6c0e17d4fa09a30fef27eacff90",
+ "value": 51760
+ }
+ },
+ "c161d94df0f04feba9542237e0856c22": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "c22f71b1f85843209d7e5321506b9cb9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_1f44c9ce1adf470cbb19784493ed209f",
+ "IPY_MODEL_f1addc4479d849879e743cf9089e6540",
+ "IPY_MODEL_8b3505352a5a42bf910428c40ce40465"
+ ],
+ "layout": "IPY_MODEL_4c4c88d4c701450692fa0f6b0c5764b0"
+ }
+ },
+ "c4f2b06a82fd4987b8b659524a7b503b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "cca8113c54c0495daedce1327bf9c68b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e02f9b7849c64531835eb77b860d1c93",
+ "max": 464,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_56aee4853b7740e6a977254f5d1fa66d",
+ "value": 464
+ }
+ },
+ "cced8fd7e998472794f3f3e3018956a5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "cf245afeb1c04f29a24d291608c3d157": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_b518dcee69074b87be73957cd810e7ed",
+ "IPY_MODEL_e29104486d594b2992d7285e0ef77371",
+ "IPY_MODEL_6578fd7acdb54c4c93528ea431fd0144"
+ ],
+ "layout": "IPY_MODEL_d35db8148a354c56aaac56dbae22536f"
+ }
+ },
+ "cfe8cae0e22b495bafa221a63d13b283": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "cfeb365ddf7548d58b2557f22737fcf5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "d1b47d39450d4019ae85c9b2f943eeaf": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_4dcf6ff672d24983a1877a8431709aa9",
+ "IPY_MODEL_7975adbc2ec5489ea7fa0167e620d85c",
+ "IPY_MODEL_71ce208e20d6483abb9ed923510c86d7"
+ ],
+ "layout": "IPY_MODEL_cfe8cae0e22b495bafa221a63d13b283"
+ }
+ },
+ "d35db8148a354c56aaac56dbae22536f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d69dc491b3ab44d7852b21873ed7bb7f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d891f8d0b1fc462f8008d02bb2a15692": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d8e5318cead340c4adbeaccc05d39225": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "daf4cd890b35422683d22fd30bc71e83": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_b0240cd9a4554b29ae11f8051984a1c6",
+ "IPY_MODEL_bc883d4cf13e4f8b8a4fe5f410cb6efd",
+ "IPY_MODEL_99fdbb0300c14c139d1937c646f0cfe7"
+ ],
+ "layout": "IPY_MODEL_c161d94df0f04feba9542237e0856c22"
+ }
+ },
+ "db19fc8d37db4e45a5790a876836d8c4": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "de868e26e7154f62aa86223a539ad421": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "dea41c5260884aa6879b5e1d1697b14f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e02f9b7849c64531835eb77b860d1c93": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e29104486d594b2992d7285e0ef77371": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_a9f0cc51fc3d4d7b874c32dcf1c5bdf2",
+ "max": 50641,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_2f6c70dd266c4816bfad3fd3d192929a",
+ "value": 50641
+ }
+ },
+ "e36a3f9eff0e4cf68834d66b0213ae96": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e9159e03e61f4f56978ece9c3bca49b2": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e9adf418296e436fb48bb9f78885598b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "edaf890370314a218f138015faa0b05d": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "f1addc4479d849879e743cf9089e6540": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_43dec2ede91341f5af60eb522e18e984",
+ "max": 5702746405,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_d8e5318cead340c4adbeaccc05d39225",
+ "value": 5702746405
+ }
+ },
+ "f2df530d22c74977b249dd9fb5f4829b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_21db8a77b00d4a4e82fdfa608657531f",
+ "max": 9085698,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_6dbbedeca9314e66ae50e44ffa31a414",
+ "value": 9085698
+ }
+ },
+ "f401d53bf28e44eb906bce6c05412662": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "fb995c740590427b882572c81d4e848c": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "fce7a61c25ec4390af43d92b7c473a45": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_30307300bc4e4baf96560e30969a82b6",
+ "IPY_MODEL_8fc142b628fb40568730234de1cafde2",
+ "IPY_MODEL_a8464a4c711e4e00aafdfc919b60d07e"
+ ],
+ "layout": "IPY_MODEL_5f40db8173dd4d76b6ef5ed6d9ec8b6e"
+ }
+ },
+ "fdb1941405ed4e4aa06019933892deb3": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ }
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/notebooks/06_tune-small-py3.11.ipynb b/notebooks/06_tune-small-py3.11.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..41257cb1d936dc55f300f33e3ff0eda78bc352f6
--- /dev/null
+++ b/notebooks/06_tune-small-py3.11.ipynb
@@ -0,0 +1,4673 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "0ea8b46b-839b-445b-8043-ccdf4e920ace",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [],
+ "source": [
+ "%load_ext autoreload\n",
+ "%autoreload 2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "6d394937-6c99-4a7c-9d32-7600a280032f",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "workding dir: /home/inflaton/code/projects/courses/llm-finetuning\n"
+ ]
+ }
+ ],
+ "source": [
+ "import os\n",
+ "import sys\n",
+ "from pathlib import Path\n",
+ "\n",
+ "workding_dir = str(Path.cwd().parent)\n",
+ "os.chdir(workding_dir)\n",
+ "sys.path.append(workding_dir)\n",
+ "print(\"workding dir:\", workding_dir)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "9f67ec60-2f24-411c-84eb-0dd664b44775",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "True"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from dotenv import find_dotenv, load_dotenv\n",
+ "\n",
+ "found_dotenv = find_dotenv(\".env\")\n",
+ "\n",
+ "if len(found_dotenv) == 0:\n",
+ " found_dotenv = find_dotenv(\".env.example\")\n",
+ "print(f\"loading env vars from: {found_dotenv}\")\n",
+ "load_dotenv(found_dotenv, override=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "f1597656-8042-4878-9d3b-9ebfb8dd86dc",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "('unsloth/Qwen2-0.5B-Instruct-bnb-4bit',\n",
+ " True,\n",
+ " None,\n",
+ " None,\n",
+ " 2048,\n",
+ " 10,\n",
+ " None,\n",
+ " 'datasets/mac/mac.tsv',\n",
+ " 'results/mac-results_py3.11.csv')"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import os\n",
+ "\n",
+ "model_name = os.getenv(\"MODEL_NAME\")\n",
+ "token = os.getenv(\"HF_TOKEN\") or None\n",
+ "load_in_4bit = os.getenv(\"LOAD_IN_4BIT\") == \"true\"\n",
+ "local_model = os.getenv(\"LOCAL_MODEL\")\n",
+ "hub_model = os.getenv(\"HUB_MODEL\")\n",
+ "num_train_epochs = int(os.getenv(\"NUM_TRAIN_EPOCHS\") or 0)\n",
+ "data_path = os.getenv(\"DATA_PATH\")\n",
+ "results_path = os.getenv(\"RESULTS_PATH\")\n",
+ "\n",
+ "max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!\n",
+ "dtype = (\n",
+ " None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+\n",
+ ")\n",
+ "\n",
+ "model_name, load_in_4bit, local_model, hub_model, max_seq_length, num_train_epochs, dtype, data_path, results_path"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Tue Jun 25 23:53:09 2024 \n",
+ "+---------------------------------------------------------------------------------------+\n",
+ "| NVIDIA-SMI 545.23.07 Driver Version: 546.12 CUDA Version: 12.3 |\n",
+ "|-----------------------------------------+----------------------+----------------------+\n",
+ "| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
+ "| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n",
+ "| | | MIG M. |\n",
+ "|=========================================+======================+======================|\n",
+ "| 0 NVIDIA GeForce RTX 4080 ... On | 00000000:01:00.0 On | N/A |\n",
+ "| N/A 54C P8 5W / 150W | 483MiB / 12282MiB | 9% Default |\n",
+ "| | | N/A |\n",
+ "+-----------------------------------------+----------------------+----------------------+\n",
+ " \n",
+ "+---------------------------------------------------------------------------------------+\n",
+ "| Processes: |\n",
+ "| GPU GI CI PID Type Process name GPU Memory |\n",
+ "| ID ID Usage |\n",
+ "|=======================================================================================|\n",
+ "| No running processes found |\n",
+ "+---------------------------------------------------------------------------------------+\n"
+ ]
+ }
+ ],
+ "source": [
+ "!nvidia-smi"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Python 3.11.9\n",
+ "\u001b[33mWARNING: Package(s) not found: flash-attn\u001b[0m\u001b[33m\n",
+ "\u001b[0mCPU times: user 7.35 ms, sys: 1.47 ms, total: 8.81 ms\n",
+ "Wall time: 509 ms\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "!python --version\n",
+ "!pip show flash-attn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Current Directory:\n",
+ "/home/inflaton/code/projects/courses/llm-finetuning\n",
+ "Tuning unsloth/Qwen2-0.5B-Instruct\n",
+ "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+ "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n",
+ "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n",
+ "unsloth/Qwen2-0.5B-Instruct True 2048 10 None datasets/mac/mac.tsv results/mac-results_py3.11.csv True True True\n",
+ "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.0 GB of memory reserved.\n",
+ "loading model: unsloth/Qwen2-0.5B-Instruct\n",
+ "==((====))== Unsloth: Fast Qwen2 patching release 2024.6\n",
+ " \\\\ /| GPU: NVIDIA GeForce RTX 4080 Laptop GPU. Max memory: 11.994 GB. Platform = Linux.\n",
+ "O^O/ \\_/ \\ Pytorch: 2.3.0+cu121. CUDA = 8.9. CUDA Toolkit = 12.1.\n",
+ "\\ / Bfloat16 = TRUE. Xformers = 0.0.26.post1. FA = False.\n",
+ " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n",
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.633 GB of memory reserved.\n",
+ "loading train/test data files\n",
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n",
+ "Evaluating base model: unsloth/Qwen2-0.5B-Instruct\n",
+ " 0%| | 0/1133 [00:00, ?it/s]--------\n",
+ "step 1: Old耿举枪,斜着眼睛眯起来,扣动扳机,砰的一声响,子弹一颗颗落在地上,像是打在棉花团上一样,砰的一声又一声,声音很清脆。<|im_end|>\n",
+ "--------\n",
+ "step 2: Old耿举枪,斜着眼睛眯起来,扣动扳机,砰的一声响,子弹一颗颗落在地上,像是打在棉花团上一样,砰的一声又一声,声音很清脆。\n",
+ "--------\n",
+ "step 3: Old耿举枪,斜着眼睛眯起来,扣动扳机,砰的一声响,子弹一颗颗落在地上,像是打在棉花团上一样,砰的一声又一声,声音很清脆。\n",
+ "100%|███████████████████████████████████████| 1133/1133 [21:27<00:00, 1.14s/it]\n",
+ " chinese ... unsloth/Qwen2-0.5B-Instruct\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old耿举枪,斜着眼睛眯起来,扣动扳机,砰的一声响,子弹一颗颗落在地上,像是打在棉花团上一样...\n",
+ "\n",
+ "[1 rows x 3 columns]\n",
+ "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.893 GB of memory reserved.\n",
+ "Unsloth 2024.6 patched 24 layers with 0 QKV layers, 24 O layers and 24 MLP layers.\n",
+ "/home/inflaton/miniconda3/envs/llm-fine-tune/lib/python3.11/site-packages/transformers/training_args.py:1965: FutureWarning: `--push_to_hub_token` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `--hub_token` instead.\n",
+ " warnings.warn(\n",
+ "/home/inflaton/miniconda3/envs/llm-fine-tune/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:269: UserWarning: You passed a `max_seq_length` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.\n",
+ " warnings.warn(\n",
+ "/home/inflaton/miniconda3/envs/llm-fine-tune/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:283: UserWarning: You passed a `dataset_num_proc` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.\n",
+ " warnings.warn(\n",
+ "/home/inflaton/miniconda3/envs/llm-fine-tune/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:307: UserWarning: You passed a `dataset_text_field` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.\n",
+ " warnings.warn(\n",
+ "Map (num_proc=2): 100%|████████████| 4528/4528 [00:00<00:00, 5300.69 examples/s]\n",
+ "(4) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.893 GB of memory reserved.\n",
+ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n",
+ " \\\\ /| Num examples = 4,528 | Num Epochs = 10\n",
+ "O^O/ \\_/ \\ Batch size per device = 2 | Gradient Accumulation steps = 4\n",
+ "\\ / Total batch size = 8 | Total steps = 5,660\n",
+ " \"-____-\" Number of trainable parameters = 8,798,208\n",
+ "{'loss': 1.9402, 'grad_norm': 0.9608356952667236, 'learning_rate': 0.00019664014146772768, 'epoch': 0.18}\n",
+ "{'loss': 1.7757, 'grad_norm': 0.8031952977180481, 'learning_rate': 0.0001931034482758621, 'epoch': 0.35}\n",
+ "{'loss': 1.7142, 'grad_norm': 0.9293338656425476, 'learning_rate': 0.00018956675508399648, 'epoch': 0.53}\n",
+ "{'loss': 1.7148, 'grad_norm': 0.7500923275947571, 'learning_rate': 0.00018603006189213086, 'epoch': 0.71}\n",
+ "{'loss': 1.6853, 'grad_norm': 0.8244311809539795, 'learning_rate': 0.00018249336870026527, 'epoch': 0.88}\n",
+ "{'loss': 1.607, 'grad_norm': 0.7938973903656006, 'learning_rate': 0.00017895667550839965, 'epoch': 1.06}\n",
+ "{'loss': 1.491, 'grad_norm': 0.8929483890533447, 'learning_rate': 0.00017541998231653406, 'epoch': 1.24}\n",
+ "{'loss': 1.5396, 'grad_norm': 0.9767352342605591, 'learning_rate': 0.00017188328912466844, 'epoch': 1.41}\n",
+ "{'loss': 1.4711, 'grad_norm': 1.0663349628448486, 'learning_rate': 0.00016834659593280285, 'epoch': 1.59}\n",
+ "{'loss': 1.4675, 'grad_norm': 0.9794139862060547, 'learning_rate': 0.00016480990274093723, 'epoch': 1.77}\n",
+ "{'loss': 1.5265, 'grad_norm': 1.0100280046463013, 'learning_rate': 0.00016127320954907164, 'epoch': 1.94}\n",
+ "{'loss': 1.3482, 'grad_norm': 1.1571651697158813, 'learning_rate': 0.000157736516357206, 'epoch': 2.12}\n",
+ "{'loss': 1.2146, 'grad_norm': 1.173205018043518, 'learning_rate': 0.0001541998231653404, 'epoch': 2.3}\n",
+ "{'loss': 1.2565, 'grad_norm': 1.2972688674926758, 'learning_rate': 0.0001506631299734748, 'epoch': 2.47}\n",
+ "{'loss': 1.2542, 'grad_norm': 1.3474286794662476, 'learning_rate': 0.0001471264367816092, 'epoch': 2.65}\n",
+ "{'loss': 1.2652, 'grad_norm': 1.1654258966445923, 'learning_rate': 0.0001435897435897436, 'epoch': 2.83}\n",
+ "{'loss': 1.2327, 'grad_norm': 1.0851577520370483, 'learning_rate': 0.000140053050397878, 'epoch': 3.0}\n",
+ "{'loss': 0.9784, 'grad_norm': 1.477825403213501, 'learning_rate': 0.0001365163572060124, 'epoch': 3.18}\n",
+ "{'loss': 0.9903, 'grad_norm': 1.5403794050216675, 'learning_rate': 0.00013297966401414678, 'epoch': 3.36}\n",
+ "{'loss': 0.9977, 'grad_norm': 1.510469913482666, 'learning_rate': 0.0001294429708222812, 'epoch': 3.53}\n",
+ "{'loss': 0.9786, 'grad_norm': 1.5427402257919312, 'learning_rate': 0.00012590627763041555, 'epoch': 3.71}\n",
+ "{'loss': 1.0211, 'grad_norm': 1.569674015045166, 'learning_rate': 0.00012236958443854996, 'epoch': 3.89}\n",
+ "{'loss': 0.9282, 'grad_norm': 1.5466079711914062, 'learning_rate': 0.00011883289124668435, 'epoch': 4.06}\n",
+ "{'loss': 0.7374, 'grad_norm': 1.6999439001083374, 'learning_rate': 0.00011529619805481875, 'epoch': 4.24}\n",
+ "{'loss': 0.7371, 'grad_norm': 1.7169603109359741, 'learning_rate': 0.00011175950486295315, 'epoch': 4.42}\n",
+ "{'loss': 0.7729, 'grad_norm': 2.1618764400482178, 'learning_rate': 0.00010822281167108754, 'epoch': 4.59}\n",
+ "{'loss': 0.7718, 'grad_norm': 2.412935972213745, 'learning_rate': 0.00010468611847922194, 'epoch': 4.77}\n",
+ "{'loss': 0.7913, 'grad_norm': 1.8272628784179688, 'learning_rate': 0.00010114942528735633, 'epoch': 4.95}\n",
+ "{'loss': 0.6267, 'grad_norm': 1.8343967199325562, 'learning_rate': 9.761273209549072e-05, 'epoch': 5.12}\n",
+ "{'loss': 0.5543, 'grad_norm': 1.6036676168441772, 'learning_rate': 9.407603890362513e-05, 'epoch': 5.3}\n",
+ "{'loss': 0.5589, 'grad_norm': 1.6545743942260742, 'learning_rate': 9.053934571175951e-05, 'epoch': 5.48}\n",
+ "{'loss': 0.5872, 'grad_norm': 1.8736636638641357, 'learning_rate': 8.70026525198939e-05, 'epoch': 5.65}\n",
+ "{'loss': 0.5794, 'grad_norm': 2.7655928134918213, 'learning_rate': 8.34659593280283e-05, 'epoch': 5.83}\n",
+ "{'loss': 0.5763, 'grad_norm': 2.0712125301361084, 'learning_rate': 7.99292661361627e-05, 'epoch': 6.01}\n",
+ "{'loss': 0.3967, 'grad_norm': 1.8133138418197632, 'learning_rate': 7.639257294429708e-05, 'epoch': 6.18}\n",
+ "{'loss': 0.4092, 'grad_norm': 1.7030404806137085, 'learning_rate': 7.285587975243147e-05, 'epoch': 6.36}\n",
+ "{'loss': 0.4278, 'grad_norm': 2.3163561820983887, 'learning_rate': 6.931918656056587e-05, 'epoch': 6.54}\n",
+ "{'loss': 0.4194, 'grad_norm': 1.8104350566864014, 'learning_rate': 6.578249336870027e-05, 'epoch': 6.71}\n",
+ "{'loss': 0.4248, 'grad_norm': 1.6338876485824585, 'learning_rate': 6.224580017683466e-05, 'epoch': 6.89}\n",
+ "{'loss': 0.3832, 'grad_norm': 1.7445095777511597, 'learning_rate': 5.870910698496905e-05, 'epoch': 7.07}\n",
+ "{'loss': 0.2912, 'grad_norm': 1.7364373207092285, 'learning_rate': 5.517241379310345e-05, 'epoch': 7.24}\n",
+ "{'loss': 0.3119, 'grad_norm': 1.3449686765670776, 'learning_rate': 5.163572060123785e-05, 'epoch': 7.42}\n",
+ "{'loss': 0.3127, 'grad_norm': 1.8036069869995117, 'learning_rate': 4.809902740937224e-05, 'epoch': 7.6}\n",
+ "{'loss': 0.3028, 'grad_norm': 2.0399818420410156, 'learning_rate': 4.4562334217506634e-05, 'epoch': 7.77}\n",
+ "{'loss': 0.318, 'grad_norm': 1.544445514678955, 'learning_rate': 4.1025641025641023e-05, 'epoch': 7.95}\n",
+ "{'loss': 0.2465, 'grad_norm': 1.361892819404602, 'learning_rate': 3.7488947833775426e-05, 'epoch': 8.13}\n",
+ "{'loss': 0.2259, 'grad_norm': 2.45100736618042, 'learning_rate': 3.3952254641909815e-05, 'epoch': 8.3}\n",
+ "{'loss': 0.2285, 'grad_norm': 1.014116883277893, 'learning_rate': 3.041556145004421e-05, 'epoch': 8.48}\n",
+ "{'loss': 0.2358, 'grad_norm': 1.2610195875167847, 'learning_rate': 2.6878868258178604e-05, 'epoch': 8.66}\n",
+ "{'loss': 0.2351, 'grad_norm': 1.6966944932937622, 'learning_rate': 2.3342175066313e-05, 'epoch': 8.83}\n",
+ "{'loss': 0.2387, 'grad_norm': 1.0741287469863892, 'learning_rate': 1.9805481874447392e-05, 'epoch': 9.01}\n",
+ "{'loss': 0.1809, 'grad_norm': 1.14618980884552, 'learning_rate': 1.6268788682581788e-05, 'epoch': 9.19}\n",
+ "{'loss': 0.1911, 'grad_norm': 1.1346627473831177, 'learning_rate': 1.273209549071618e-05, 'epoch': 9.36}\n",
+ "{'loss': 0.1869, 'grad_norm': 1.1694375276565552, 'learning_rate': 9.195402298850575e-06, 'epoch': 9.54}\n",
+ "{'loss': 0.1878, 'grad_norm': 1.8589993715286255, 'learning_rate': 5.658709106984969e-06, 'epoch': 9.72}\n",
+ "{'loss': 0.1889, 'grad_norm': 0.9836981892585754, 'learning_rate': 2.1220159151193635e-06, 'epoch': 9.89}\n",
+ "{'train_runtime': 4526.9366, 'train_samples_per_second': 10.002, 'train_steps_per_second': 1.25, 'train_loss': 0.7976709857846317, 'epoch': 10.0}\n",
+ "100%|█████████████████████████████████████| 5660/5660 [1:15:26<00:00, 1.25it/s]\n",
+ "(5) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "4526.9366 seconds used for training.\n",
+ "75.45 minutes used for training.\n",
+ "Peak reserved memory = 1.371 GB.\n",
+ "Peak reserved memory for training = 0.478 GB.\n",
+ "Peak reserved memory % of max memory = 11.431 %.\n",
+ "Peak reserved memory for training % of max memory = 3.985 %.\n",
+ "Evaluating fine-tuned model: unsloth/Qwen2-0.5B-Instruct\n",
+ " 0%| | 0/1133 [00:00, ?it/s]--------\n",
+ "step 1: Old Geng raised his rifle and tilted his head to one side; as the shotgun blast issued, crinkles formed in his eye, and ice-shells splattered through the air like hailstones.<|im_end|>\n",
+ "--------\n",
+ "step 2: Old Geng raised his rifle and tilted his head to one side; as the shotgun blast issued, crinkles formed in his eye, and ice-shells splattered through the air like hailstones.\n",
+ "--------\n",
+ "step 3: Old Geng raised his rifle and tilted his head to one side; as the shotgun blast issued, crinkles formed in his eye, and ice-shells splattered through the air like hailstones.\n",
+ "100%|███████████████████████████████████████| 1133/1133 [29:54<00:00, 1.58s/it]\n",
+ " chinese ... unsloth/Qwen2-0.5B-Instruct(finetuned)\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Geng raised his rifle and tilted his head ...\n",
+ "\n",
+ "[1 rows x 4 columns]\n",
+ "(6) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "1.371 GB of memory reserved.\n",
+ "model.safetensors: 100%|█████████████████████| 493M/493M [00:50<00:00, 9.77MB/s]\n",
+ "Saved model to https://huggingface.co/Qwen2-0.5B-Instruct-MAC-lora\n",
+ "README.md: 100%|███████████████████████████████| 599/599 [00:00<00:00, 11.4MB/s]\n",
+ "Unsloth: Saving tokenizer... Done.\n",
+ "Unsloth: Saving model... Done.\n",
+ "Unsloth: Saving LoRA adapters. Please wait...\n",
+ "401 Client Error: Unauthorized for url: https://huggingface.co/api/repos/create (Request ID: Root=1-667b060d-6b558b430a940ed15545a0b3;57af47e9-c736-4f24-80b8-1d4983acc0e0)\n",
+ "\n",
+ "Invalid username or password.\n",
+ "Tuning unsloth/Qwen2-1.5B-Instruct\n",
+ "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+ "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n",
+ "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n",
+ "unsloth/Qwen2-1.5B-Instruct True 2048 10 None datasets/mac/mac.tsv results/mac-results_py3.11.csv True True True\n",
+ "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.0 GB of memory reserved.\n",
+ "loading model: unsloth/Qwen2-1.5B-Instruct\n",
+ "==((====))== Unsloth: Fast Qwen2 patching release 2024.6\n",
+ " \\\\ /| GPU: NVIDIA GeForce RTX 4080 Laptop GPU. Max memory: 11.994 GB. Platform = Linux.\n",
+ "O^O/ \\_/ \\ Pytorch: 2.3.0+cu121. CUDA = 8.9. CUDA Toolkit = 12.1.\n",
+ "\\ / Bfloat16 = TRUE. Xformers = 0.0.26.post1. FA = False.\n",
+ " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n",
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "1.516 GB of memory reserved.\n",
+ "loading train/test data files\n",
+ "Map: 100%|████████████████████████| 4528/4528 [00:00<00:00, 31812.02 examples/s]\n",
+ "Map: 100%|████████████████████████| 1133/1133 [00:00<00:00, 14717.68 examples/s]\n",
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n",
+ "Evaluating base model: unsloth/Qwen2-1.5B-Instruct\n",
+ " 0%| | 0/1133 [00:00, ?it/s]--------\n",
+ "step 1: Lao Jing lifted up his gun, squinted one of his two triangular eyes, and fired a shot with his gun. A hail of golden sparrows rained down from above, striking the willow branches with a loud clatter, making a sound.<|im_end|>\n",
+ "--------\n",
+ "step 2: Lao Jing lifted up his gun, squinted one of his two triangular eyes, and fired a shot with his gun. A hail of golden sparrows rained down from above, striking the willow branches with a loud clatter, making a sound.\n",
+ "--------\n",
+ "step 3: Lao Jing lifted up his gun, squinted one of his two triangular eyes, and fired a shot with his gun. A hail of golden sparrows rained down from above, striking the willow branches with a loud clatter, making a sound.\n",
+ "100%|█████████████████████████████████████| 1133/1133 [1:34:06<00:00, 4.98s/it]\n",
+ " chinese ... unsloth/Qwen2-1.5B-Instruct\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Lao Jing lifted up his gun, squinted one of hi...\n",
+ "\n",
+ "[1 rows x 5 columns]\n",
+ "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "3.945 GB of memory reserved.\n",
+ "Unsloth 2024.6 patched 28 layers with 0 QKV layers, 28 O layers and 28 MLP layers.\n",
+ "/home/inflaton/miniconda3/envs/llm-fine-tune/lib/python3.11/site-packages/transformers/training_args.py:1965: FutureWarning: `--push_to_hub_token` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `--hub_token` instead.\n",
+ " warnings.warn(\n",
+ "/home/inflaton/miniconda3/envs/llm-fine-tune/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:269: UserWarning: You passed a `max_seq_length` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.\n",
+ " warnings.warn(\n",
+ "/home/inflaton/miniconda3/envs/llm-fine-tune/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:283: UserWarning: You passed a `dataset_num_proc` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.\n",
+ " warnings.warn(\n",
+ "/home/inflaton/miniconda3/envs/llm-fine-tune/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:307: UserWarning: You passed a `dataset_text_field` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.\n",
+ " warnings.warn(\n",
+ "Map (num_proc=2): 100%|████████████| 4528/4528 [00:02<00:00, 2177.21 examples/s]\n",
+ "(4) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "3.945 GB of memory reserved.\n",
+ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n",
+ " \\\\ /| Num examples = 4,528 | Num Epochs = 10\n",
+ "O^O/ \\_/ \\ Batch size per device = 2 | Gradient Accumulation steps = 4\n",
+ "\\ / Total batch size = 8 | Total steps = 5,660\n",
+ " \"-____-\" Number of trainable parameters = 18,464,768\n",
+ "{'loss': 1.7414, 'grad_norm': 0.645324170589447, 'learning_rate': 0.00019664014146772768, 'epoch': 0.18}\n",
+ "{'loss': 1.5681, 'grad_norm': 0.6221398115158081, 'learning_rate': 0.0001931034482758621, 'epoch': 0.35}\n",
+ "{'loss': 1.5156, 'grad_norm': 0.6400603652000427, 'learning_rate': 0.00018956675508399648, 'epoch': 0.53}\n",
+ "{'loss': 1.5174, 'grad_norm': 0.5692432522773743, 'learning_rate': 0.00018603006189213086, 'epoch': 0.71}\n",
+ "{'loss': 1.496, 'grad_norm': 0.5720127820968628, 'learning_rate': 0.00018249336870026527, 'epoch': 0.88}\n",
+ "{'loss': 1.4181, 'grad_norm': 0.5995662808418274, 'learning_rate': 0.00017895667550839965, 'epoch': 1.06}\n",
+ "{'loss': 1.2976, 'grad_norm': 0.7103854417800903, 'learning_rate': 0.00017541998231653406, 'epoch': 1.24}\n",
+ "{'loss': 1.3392, 'grad_norm': 0.6867524981498718, 'learning_rate': 0.00017188328912466844, 'epoch': 1.41}\n",
+ "{'loss': 1.2788, 'grad_norm': 0.8870283961296082, 'learning_rate': 0.00016834659593280285, 'epoch': 1.59}\n",
+ "{'loss': 1.2799, 'grad_norm': 0.8677213788032532, 'learning_rate': 0.00016480990274093723, 'epoch': 1.77}\n",
+ "{'loss': 1.3311, 'grad_norm': 0.824891984462738, 'learning_rate': 0.00016127320954907164, 'epoch': 1.94}\n",
+ "{'loss': 1.1283, 'grad_norm': 0.9994134902954102, 'learning_rate': 0.000157736516357206, 'epoch': 2.12}\n",
+ "{'loss': 0.9958, 'grad_norm': 1.1032615900039673, 'learning_rate': 0.0001541998231653404, 'epoch': 2.3}\n",
+ "{'loss': 1.0305, 'grad_norm': 1.2964789867401123, 'learning_rate': 0.0001506631299734748, 'epoch': 2.47}\n",
+ "{'loss': 1.0345, 'grad_norm': 1.1391383409500122, 'learning_rate': 0.0001471264367816092, 'epoch': 2.65}\n",
+ "{'loss': 1.0352, 'grad_norm': 1.033879041671753, 'learning_rate': 0.0001435897435897436, 'epoch': 2.83}\n",
+ "{'loss': 1.0078, 'grad_norm': 0.893123209476471, 'learning_rate': 0.000140053050397878, 'epoch': 3.0}\n",
+ "{'loss': 0.6974, 'grad_norm': 1.3191255331039429, 'learning_rate': 0.0001365163572060124, 'epoch': 3.18}\n",
+ "{'loss': 0.7094, 'grad_norm': 1.3912413120269775, 'learning_rate': 0.00013297966401414678, 'epoch': 3.36}\n",
+ "{'loss': 0.7159, 'grad_norm': 1.262569785118103, 'learning_rate': 0.0001294429708222812, 'epoch': 3.53}\n",
+ "{'loss': 0.7052, 'grad_norm': 1.4299038648605347, 'learning_rate': 0.00012590627763041555, 'epoch': 3.71}\n",
+ "{'loss': 0.7441, 'grad_norm': 1.2525451183319092, 'learning_rate': 0.00012236958443854996, 'epoch': 3.89}\n",
+ "{'loss': 0.6426, 'grad_norm': 1.1614264249801636, 'learning_rate': 0.00011883289124668435, 'epoch': 4.06}\n",
+ "{'loss': 0.4613, 'grad_norm': 1.4634888172149658, 'learning_rate': 0.00011529619805481875, 'epoch': 4.24}\n",
+ "{'loss': 0.4463, 'grad_norm': 1.479321002960205, 'learning_rate': 0.00011175950486295315, 'epoch': 4.42}\n",
+ "{'loss': 0.4803, 'grad_norm': 1.524327278137207, 'learning_rate': 0.00010822281167108754, 'epoch': 4.59}\n",
+ "{'loss': 0.4775, 'grad_norm': 1.8287591934204102, 'learning_rate': 0.00010468611847922194, 'epoch': 4.77}\n",
+ "{'loss': 0.4858, 'grad_norm': 1.5343736410140991, 'learning_rate': 0.00010114942528735633, 'epoch': 4.95}\n",
+ "{'loss': 0.3607, 'grad_norm': 1.509474277496338, 'learning_rate': 9.761273209549072e-05, 'epoch': 5.12}\n",
+ "{'loss': 0.291, 'grad_norm': 0.9709746837615967, 'learning_rate': 9.407603890362513e-05, 'epoch': 5.3}\n",
+ "{'loss': 0.297, 'grad_norm': 1.1549153327941895, 'learning_rate': 9.053934571175951e-05, 'epoch': 5.48}\n",
+ "{'loss': 0.3137, 'grad_norm': 1.5136038064956665, 'learning_rate': 8.70026525198939e-05, 'epoch': 5.65}\n",
+ "{'loss': 0.3086, 'grad_norm': 1.5126714706420898, 'learning_rate': 8.34659593280283e-05, 'epoch': 5.83}\n",
+ "{'loss': 0.3054, 'grad_norm': 1.2357054948806763, 'learning_rate': 7.99292661361627e-05, 'epoch': 6.01}\n",
+ "{'loss': 0.1908, 'grad_norm': 0.998966634273529, 'learning_rate': 7.639257294429708e-05, 'epoch': 6.18}\n",
+ "{'loss': 0.2016, 'grad_norm': 1.4552719593048096, 'learning_rate': 7.285587975243147e-05, 'epoch': 6.36}\n",
+ "{'loss': 0.2063, 'grad_norm': 1.193115234375, 'learning_rate': 6.931918656056587e-05, 'epoch': 6.54}\n",
+ "{'loss': 0.2068, 'grad_norm': 1.1482857465744019, 'learning_rate': 6.578249336870027e-05, 'epoch': 6.71}\n",
+ "{'loss': 0.2145, 'grad_norm': 1.2252038717269897, 'learning_rate': 6.224580017683466e-05, 'epoch': 6.89}\n",
+ "{'loss': 0.194, 'grad_norm': 0.8770560026168823, 'learning_rate': 5.870910698496905e-05, 'epoch': 7.07}\n",
+ "{'loss': 0.1487, 'grad_norm': 0.7064879536628723, 'learning_rate': 5.517241379310345e-05, 'epoch': 7.24}\n",
+ "{'loss': 0.1571, 'grad_norm': 0.8916934132575989, 'learning_rate': 5.163572060123785e-05, 'epoch': 7.42}\n",
+ "{'loss': 0.1576, 'grad_norm': 0.910902738571167, 'learning_rate': 4.809902740937224e-05, 'epoch': 7.6}\n",
+ "{'loss': 0.1576, 'grad_norm': 1.3216708898544312, 'learning_rate': 4.4562334217506634e-05, 'epoch': 7.77}\n",
+ "{'loss': 0.1596, 'grad_norm': 0.9173678755760193, 'learning_rate': 4.1025641025641023e-05, 'epoch': 7.95}\n",
+ "{'loss': 0.1368, 'grad_norm': 0.7557782530784607, 'learning_rate': 3.7488947833775426e-05, 'epoch': 8.13}\n",
+ "{'loss': 0.1305, 'grad_norm': 1.3723442554473877, 'learning_rate': 3.3952254641909815e-05, 'epoch': 8.3}\n",
+ "{'loss': 0.1315, 'grad_norm': 0.4736746847629547, 'learning_rate': 3.041556145004421e-05, 'epoch': 8.48}\n",
+ "{'loss': 0.1328, 'grad_norm': 0.4113923907279968, 'learning_rate': 2.6878868258178604e-05, 'epoch': 8.66}\n",
+ "{'loss': 0.1343, 'grad_norm': 0.628764271736145, 'learning_rate': 2.3342175066313e-05, 'epoch': 8.83}\n",
+ "{'loss': 0.1329, 'grad_norm': 0.5089218020439148, 'learning_rate': 1.9805481874447392e-05, 'epoch': 9.01}\n",
+ "{'loss': 0.1179, 'grad_norm': 0.45263954997062683, 'learning_rate': 1.6268788682581788e-05, 'epoch': 9.19}\n",
+ "{'loss': 0.1185, 'grad_norm': 0.552614152431488, 'learning_rate': 1.273209549071618e-05, 'epoch': 9.36}\n",
+ "{'loss': 0.1193, 'grad_norm': 0.46947821974754333, 'learning_rate': 9.195402298850575e-06, 'epoch': 9.54}\n",
+ "{'loss': 0.1202, 'grad_norm': 0.5204765200614929, 'learning_rate': 5.658709106984969e-06, 'epoch': 9.72}\n",
+ "{'loss': 0.1232, 'grad_norm': 0.4254581928253174, 'learning_rate': 2.1220159151193635e-06, 'epoch': 9.89}\n",
+ "{'train_runtime': 16066.3343, 'train_samples_per_second': 2.818, 'train_steps_per_second': 0.352, 'train_loss': 0.5993303972082509, 'epoch': 10.0}\n",
+ "100%|█████████████████████████████████████| 5660/5660 [4:27:46<00:00, 2.84s/it]\n",
+ "(5) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "16066.3343 seconds used for training.\n",
+ "267.77 minutes used for training.\n",
+ "Peak reserved memory = 3.945 GB.\n",
+ "Peak reserved memory for training = 0.0 GB.\n",
+ "Peak reserved memory % of max memory = 32.891 %.\n",
+ "Peak reserved memory for training % of max memory = 0.0 %.\n",
+ "Evaluating fine-tuned model: unsloth/Qwen2-1.5B-Instruct\n",
+ " 0%| | 0/1133 [00:00, ?it/s]--------\n",
+ "step 1: Old Geng raised the pistol to his eye, squeezed the trigger, and rainafterdowned birds landed: Golden sparrows, dead or alive, screeched and dropped from the treetops, sending sticks flying everywhere.<|im_end|>\n",
+ "--------\n",
+ "step 2: Old Geng raised the pistol to his eye, squeezed the trigger, and rainafterdowned birds landed: Golden sparrows, dead or alive, screeched and dropped from the treetops, sending sticks flying everywhere.\n",
+ "--------\n",
+ "step 3: Old Geng raised the pistol to his eye, squeezed the trigger, and rainafterdowned birds landed: Golden sparrows, dead or alive, screeched and dropped from the treetops, sending sticks flying everywhere.\n",
+ "100%|█████████████████████████████████████| 1133/1133 [2:11:15<00:00, 6.95s/it]\n",
+ " chinese ... unsloth/Qwen2-1.5B-Instruct(finetuned)\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Geng raised the pistol to his eye, squeeze...\n",
+ "\n",
+ "[1 rows x 6 columns]\n",
+ "(6) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "5.33 GB of memory reserved.\n",
+ "model.safetensors: 100%|███████████████████| 1.22G/1.22G [01:25<00:00, 14.3MB/s]\n",
+ "Saved model to https://huggingface.co/Qwen2-1.5B-Instruct-MAC-lora\n",
+ "README.md: 100%|███████████████████████████████| 599/599 [00:00<00:00, 3.80MB/s]\n",
+ "Unsloth: Saving tokenizer... Done.\n",
+ "Unsloth: Saving model... Done.\n",
+ "Unsloth: Saving LoRA adapters. Please wait...\n",
+ "401 Client Error: Unauthorized for url: https://huggingface.co/api/repos/create (Request ID: Root=1-667b7a4d-4dd9df470d5a01b733cc9060;fa501a9a-6bbd-4865-bbdc-ddcc5f272821)\n",
+ "\n",
+ "Invalid username or password.\n",
+ "CPU times: user 13min 32s, sys: 5min 37s, total: 19min 10s\n",
+ "Wall time: 10h 24min 39s\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "!./scripts/tune-small-2.sh"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Current Directory:\n",
+ "/home/inflaton/code/projects/courses/llm-finetuning\n",
+ "Tuning unsloth/Qwen2-0.5B-Instruct-bnb-4bit\n",
+ "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+ "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n",
+ "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n",
+ "unsloth/Qwen2-0.5B-Instruct-bnb-4bit True 2048 10 None datasets/mac/mac.tsv results/mac-results_py3.11.csv True True True\n",
+ "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.0 GB of memory reserved.\n",
+ "loading model: unsloth/Qwen2-0.5B-Instruct-bnb-4bit\n",
+ "==((====))== Unsloth: Fast Qwen2 patching release 2024.6\n",
+ " \\\\ /| GPU: NVIDIA GeForce RTX 4080 Laptop GPU. Max memory: 11.994 GB. Platform = Linux.\n",
+ "O^O/ \\_/ \\ Pytorch: 2.3.0+cu121. CUDA = 8.9. CUDA Toolkit = 12.1.\n",
+ "\\ / Bfloat16 = TRUE. Xformers = 0.0.26.post1. FA = False.\n",
+ " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n",
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.633 GB of memory reserved.\n",
+ "loading train/test data files\n",
+ "Map: 100%|████████████████████████| 4528/4528 [00:00<00:00, 12373.02 examples/s]\n",
+ "Map: 100%|█████████████████████████| 1133/1133 [00:00<00:00, 9100.92 examples/s]\n",
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n",
+ "Evaluating base model: unsloth/Qwen2-0.5B-Instruct-bnb-4bit\n",
+ " 0%| | 0/1133 [00:00, ?it/s]--------\n",
+ "step 1: Old Tang raises his gun, half-closed eyes, firing the trigger of the revolver. The bullet crackles as the hammer rings, a thunderous bang that echoes through the branches of the tree. The sand grains hit the ground with a thud, echoing like a drum.<|im_end|>\n",
+ "--------\n",
+ "step 2: Old Tang raises his gun, half-closed eyes, firing the trigger of the revolver. The bullet crackles as the hammer rings, a thunderous bang that echoes through the branches of the tree. The sand grains hit the ground with a thud, echoing like a drum.\n",
+ "--------\n",
+ "step 3: Old Tang raises his gun, half-closed eyes, firing the trigger of the revolver. The bullet crackles as the hammer rings, a thunderous bang that echoes through the branches of the tree. The sand grains hit the ground with a thud, echoing like a drum.\n",
+ "100%|█████████████████████████████████████| 1133/1133 [1:20:24<00:00, 4.26s/it]\n",
+ " chinese ... unsloth/Qwen2-0.5B-Instruct-bnb-4bit\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Tang raises his gun, half-closed eyes, fir...\n",
+ "\n",
+ "[1 rows x 7 columns]\n",
+ "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "3.023 GB of memory reserved.\n",
+ "Unsloth 2024.6 patched 24 layers with 0 QKV layers, 24 O layers and 24 MLP layers.\n",
+ "/home/inflaton/miniconda3/envs/llm-fine-tune/lib/python3.11/site-packages/transformers/training_args.py:1965: FutureWarning: `--push_to_hub_token` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `--hub_token` instead.\n",
+ " warnings.warn(\n",
+ "/home/inflaton/miniconda3/envs/llm-fine-tune/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:269: UserWarning: You passed a `max_seq_length` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.\n",
+ " warnings.warn(\n",
+ "/home/inflaton/miniconda3/envs/llm-fine-tune/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:283: UserWarning: You passed a `dataset_num_proc` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.\n",
+ " warnings.warn(\n",
+ "/home/inflaton/miniconda3/envs/llm-fine-tune/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:307: UserWarning: You passed a `dataset_text_field` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.\n",
+ " warnings.warn(\n",
+ "Map (num_proc=2): 100%|████████████| 4528/4528 [00:02<00:00, 2107.23 examples/s]\n",
+ "(4) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "3.023 GB of memory reserved.\n",
+ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n",
+ " \\\\ /| Num examples = 4,528 | Num Epochs = 10\n",
+ "O^O/ \\_/ \\ Batch size per device = 2 | Gradient Accumulation steps = 4\n",
+ "\\ / Total batch size = 8 | Total steps = 5,660\n",
+ " \"-____-\" Number of trainable parameters = 8,798,208\n",
+ "{'loss': 1.9402, 'grad_norm': 0.9608356952667236, 'learning_rate': 0.00019664014146772768, 'epoch': 0.18}\n",
+ "{'loss': 1.7759, 'grad_norm': 0.7993724346160889, 'learning_rate': 0.0001931034482758621, 'epoch': 0.35}\n",
+ "{'loss': 1.7139, 'grad_norm': 0.9072545766830444, 'learning_rate': 0.00018956675508399648, 'epoch': 0.53}\n",
+ "{'loss': 1.715, 'grad_norm': 0.7505761384963989, 'learning_rate': 0.00018603006189213086, 'epoch': 0.71}\n",
+ "{'loss': 1.6855, 'grad_norm': 0.819739580154419, 'learning_rate': 0.00018249336870026527, 'epoch': 0.88}\n",
+ "{'loss': 1.6071, 'grad_norm': 0.7929678559303284, 'learning_rate': 0.00017895667550839965, 'epoch': 1.06}\n",
+ "{'loss': 1.491, 'grad_norm': 0.8940563797950745, 'learning_rate': 0.00017541998231653406, 'epoch': 1.24}\n",
+ "{'loss': 1.5395, 'grad_norm': 0.9731950163841248, 'learning_rate': 0.00017188328912466844, 'epoch': 1.41}\n",
+ "{'loss': 1.4715, 'grad_norm': 1.0640537738800049, 'learning_rate': 0.00016834659593280285, 'epoch': 1.59}\n",
+ "{'loss': 1.4674, 'grad_norm': 0.9785354137420654, 'learning_rate': 0.00016480990274093723, 'epoch': 1.77}\n",
+ "{'loss': 1.5265, 'grad_norm': 1.007393479347229, 'learning_rate': 0.00016127320954907164, 'epoch': 1.94}\n",
+ "{'loss': 1.3483, 'grad_norm': 1.1656651496887207, 'learning_rate': 0.000157736516357206, 'epoch': 2.12}\n",
+ "{'loss': 1.2144, 'grad_norm': 1.1790250539779663, 'learning_rate': 0.0001541998231653404, 'epoch': 2.3}\n",
+ "{'loss': 1.2563, 'grad_norm': 1.3019884824752808, 'learning_rate': 0.0001506631299734748, 'epoch': 2.47}\n",
+ "{'loss': 1.2541, 'grad_norm': 1.2913496494293213, 'learning_rate': 0.0001471264367816092, 'epoch': 2.65}\n",
+ "{'loss': 1.2652, 'grad_norm': 1.15500009059906, 'learning_rate': 0.0001435897435897436, 'epoch': 2.83}\n",
+ "{'loss': 1.233, 'grad_norm': 1.1158246994018555, 'learning_rate': 0.000140053050397878, 'epoch': 3.0}\n",
+ "{'loss': 0.9784, 'grad_norm': 1.5188616514205933, 'learning_rate': 0.0001365163572060124, 'epoch': 3.18}\n",
+ "{'loss': 0.9898, 'grad_norm': 1.5216989517211914, 'learning_rate': 0.00013297966401414678, 'epoch': 3.36}\n",
+ "{'loss': 0.9973, 'grad_norm': 1.5246195793151855, 'learning_rate': 0.0001294429708222812, 'epoch': 3.53}\n",
+ "{'loss': 0.9778, 'grad_norm': 1.5367473363876343, 'learning_rate': 0.00012590627763041555, 'epoch': 3.71}\n",
+ "{'loss': 1.0212, 'grad_norm': 1.583097219467163, 'learning_rate': 0.00012236958443854996, 'epoch': 3.89}\n",
+ "{'loss': 0.9286, 'grad_norm': 1.4903535842895508, 'learning_rate': 0.00011883289124668435, 'epoch': 4.06}\n",
+ "{'loss': 0.7365, 'grad_norm': 1.7766040563583374, 'learning_rate': 0.00011529619805481875, 'epoch': 4.24}\n",
+ "{'loss': 0.7392, 'grad_norm': 1.7640079259872437, 'learning_rate': 0.00011175950486295315, 'epoch': 4.42}\n",
+ "{'loss': 0.772, 'grad_norm': 2.152620315551758, 'learning_rate': 0.00010822281167108754, 'epoch': 4.59}\n",
+ "{'loss': 0.7712, 'grad_norm': 2.200035333633423, 'learning_rate': 0.00010468611847922194, 'epoch': 4.77}\n",
+ "{'loss': 0.7922, 'grad_norm': 1.8203985691070557, 'learning_rate': 0.00010114942528735633, 'epoch': 4.95}\n",
+ "{'loss': 0.6284, 'grad_norm': 2.1231496334075928, 'learning_rate': 9.761273209549072e-05, 'epoch': 5.12}\n",
+ "{'loss': 0.5538, 'grad_norm': 1.633864402770996, 'learning_rate': 9.407603890362513e-05, 'epoch': 5.3}\n",
+ "{'loss': 0.5586, 'grad_norm': 1.8590402603149414, 'learning_rate': 9.053934571175951e-05, 'epoch': 5.48}\n",
+ "{'loss': 0.588, 'grad_norm': 1.8585463762283325, 'learning_rate': 8.70026525198939e-05, 'epoch': 5.65}\n",
+ "{'loss': 0.5816, 'grad_norm': 2.0347542762756348, 'learning_rate': 8.34659593280283e-05, 'epoch': 5.83}\n",
+ "{'loss': 0.5778, 'grad_norm': 1.9615962505340576, 'learning_rate': 7.99292661361627e-05, 'epoch': 6.01}\n",
+ "{'loss': 0.398, 'grad_norm': 1.861493468284607, 'learning_rate': 7.639257294429708e-05, 'epoch': 6.18}\n",
+ "{'loss': 0.4113, 'grad_norm': 1.8733363151550293, 'learning_rate': 7.285587975243147e-05, 'epoch': 6.36}\n",
+ "{'loss': 0.4295, 'grad_norm': 2.361309289932251, 'learning_rate': 6.931918656056587e-05, 'epoch': 6.54}\n",
+ "{'loss': 0.42, 'grad_norm': 1.871649980545044, 'learning_rate': 6.578249336870027e-05, 'epoch': 6.71}\n",
+ "{'loss': 0.4263, 'grad_norm': 1.567151427268982, 'learning_rate': 6.224580017683466e-05, 'epoch': 6.89}\n",
+ "{'loss': 0.3841, 'grad_norm': 1.7655881643295288, 'learning_rate': 5.870910698496905e-05, 'epoch': 7.07}\n",
+ "{'loss': 0.2916, 'grad_norm': 1.5300374031066895, 'learning_rate': 5.517241379310345e-05, 'epoch': 7.24}\n",
+ "{'loss': 0.3137, 'grad_norm': 1.399330973625183, 'learning_rate': 5.163572060123785e-05, 'epoch': 7.42}\n",
+ "{'loss': 0.3127, 'grad_norm': 2.009575605392456, 'learning_rate': 4.809902740937224e-05, 'epoch': 7.6}\n",
+ "{'loss': 0.3035, 'grad_norm': 1.74385404586792, 'learning_rate': 4.4562334217506634e-05, 'epoch': 7.77}\n",
+ "{'loss': 0.3187, 'grad_norm': 1.5835634469985962, 'learning_rate': 4.1025641025641023e-05, 'epoch': 7.95}\n",
+ "{'loss': 0.2469, 'grad_norm': 1.3062812089920044, 'learning_rate': 3.7488947833775426e-05, 'epoch': 8.13}\n",
+ "{'loss': 0.2274, 'grad_norm': 1.4867122173309326, 'learning_rate': 3.3952254641909815e-05, 'epoch': 8.3}\n",
+ "{'loss': 0.229, 'grad_norm': 1.0662896633148193, 'learning_rate': 3.041556145004421e-05, 'epoch': 8.48}\n",
+ "{'loss': 0.2378, 'grad_norm': 1.3190470933914185, 'learning_rate': 2.6878868258178604e-05, 'epoch': 8.66}\n",
+ "{'loss': 0.2362, 'grad_norm': 1.9712920188903809, 'learning_rate': 2.3342175066313e-05, 'epoch': 8.83}\n",
+ "{'loss': 0.238, 'grad_norm': 1.1038957834243774, 'learning_rate': 1.9805481874447392e-05, 'epoch': 9.01}\n",
+ "{'loss': 0.1807, 'grad_norm': 1.2336972951889038, 'learning_rate': 1.6268788682581788e-05, 'epoch': 9.19}\n",
+ "{'loss': 0.1896, 'grad_norm': 1.145556926727295, 'learning_rate': 1.273209549071618e-05, 'epoch': 9.36}\n",
+ "{'loss': 0.1878, 'grad_norm': 1.1361063718795776, 'learning_rate': 9.195402298850575e-06, 'epoch': 9.54}\n",
+ "{'loss': 0.189, 'grad_norm': 1.5798214673995972, 'learning_rate': 5.658709106984969e-06, 'epoch': 9.72}\n",
+ "{'loss': 0.19, 'grad_norm': 0.9943256974220276, 'learning_rate': 2.1220159151193635e-06, 'epoch': 9.89}\n",
+ "{'train_runtime': 13819.9366, 'train_samples_per_second': 3.276, 'train_steps_per_second': 0.41, 'train_loss': 0.7980813258949523, 'epoch': 10.0}\n",
+ "100%|█████████████████████████████████████| 5660/5660 [3:50:19<00:00, 2.44s/it]\n",
+ "(5) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "13819.9366 seconds used for training.\n",
+ "230.33 minutes used for training.\n",
+ "Peak reserved memory = 3.023 GB.\n",
+ "Peak reserved memory for training = 0.0 GB.\n",
+ "Peak reserved memory % of max memory = 25.204 %.\n",
+ "Peak reserved memory for training % of max memory = 0.0 %.\n",
+ "Evaluating fine-tuned model: unsloth/Qwen2-0.5B-Instruct-bnb-4bit\n",
+ " 0%| | 0/1133 [00:00, ?it/s]--------\n",
+ "step 1: Old Geng raised his rifle, clenching one of its triangular sights, and fired. A hailstone blast tore through the field, scattering large icy crickets (the cricket-like sounds of the gun wasp were like the crackles of钢铁的碎块) that fell powerfully in the willows between them.<|im_end|>\n",
+ "--------\n",
+ "step 2: Old Geng raised his rifle, clenching one of its triangular sights, and fired. A hailstone blast tore through the field, scattering large icy crickets (the cricket-like sounds of the gun wasp were like the crackles of钢铁的碎块) that fell powerfully in the willows between them.\n",
+ "--------\n",
+ "step 3: Old Geng raised his rifle, clenching one of its triangular sights, and fired. A hailstone blast tore through the field, scattering large icy crickets (the cricket-like sounds of the gun wasp were like the crackles of钢铁的碎块) that fell powerfully in the willows between them.\n",
+ "100%|█████████████████████████████████████| 1133/1133 [1:48:52<00:00, 5.77s/it]\n",
+ " chinese ... unsloth/Qwen2-0.5B-Instruct-bnb-4bit(finetuned)\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Geng raised his rifle, clenching one of it...\n",
+ "\n",
+ "[1 rows x 8 columns]\n",
+ "(6) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "4.344 GB of memory reserved.\n",
+ "model.safetensors: 100%|█████████████████████| 493M/493M [01:44<00:00, 4.71MB/s]\n",
+ "Saved model to https://huggingface.co/Qwen2-0.5B-Instruct-bnb-4bit-MAC-lora\n",
+ "README.md: 100%|███████████████████████████████| 599/599 [00:00<00:00, 3.50MB/s]\n",
+ "Unsloth: Saving tokenizer... Done.\n",
+ "Unsloth: Saving model... Done.\n",
+ "Unsloth: Saving LoRA adapters. Please wait...\n",
+ "401 Client Error: Unauthorized for url: https://huggingface.co/api/repos/create (Request ID: Root=1-667bdd5e-773cdcf53c89ad5261301688;d08e235b-a133-4e46-b5db-3e4f74133461)\n",
+ "\n",
+ "Invalid username or password.\n",
+ "Tuning unsloth/Qwen2-1.5B-Instruct-bnb-4bit\n",
+ "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+ "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n",
+ "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n",
+ "unsloth/Qwen2-1.5B-Instruct-bnb-4bit True 2048 10 None datasets/mac/mac.tsv results/mac-results_py3.11.csv True True True\n",
+ "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.0 GB of memory reserved.\n",
+ "loading model: unsloth/Qwen2-1.5B-Instruct-bnb-4bit\n",
+ "==((====))== Unsloth: Fast Qwen2 patching release 2024.6\n",
+ " \\\\ /| GPU: NVIDIA GeForce RTX 4080 Laptop GPU. Max memory: 11.994 GB. Platform = Linux.\n",
+ "O^O/ \\_/ \\ Pytorch: 2.3.0+cu121. CUDA = 8.9. CUDA Toolkit = 12.1.\n",
+ "\\ / Bfloat16 = TRUE. Xformers = 0.0.26.post1. FA = False.\n",
+ " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n",
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "1.516 GB of memory reserved.\n",
+ "loading train/test data files\n",
+ "Map: 100%|████████████████████████| 4528/4528 [00:00<00:00, 10965.65 examples/s]\n",
+ "Map: 100%|█████████████████████████| 1133/1133 [00:00<00:00, 9010.28 examples/s]\n",
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n",
+ "Evaluating base model: unsloth/Qwen2-1.5B-Instruct-bnb-4bit\n",
+ " 0%| | 0/1133 [00:00, ?it/s]--------\n",
+ "step 1: Old耿拿起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞迸着,嚓嚓有声。<|im_end|>\n",
+ "--------\n",
+ "step 2: Old耿拿起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞迸着,嚓嚓有声。\n",
+ "--------\n",
+ "step 3: Old耿拿起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞迸着,嚓嚓有声。\n",
+ "100%|█████████████████████████████████████| 1133/1133 [1:09:28<00:00, 3.68s/it]\n",
+ " chinese ... unsloth/Qwen2-1.5B-Instruct-bnb-4bit\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old耿拿起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝...\n",
+ "\n",
+ "[1 rows x 9 columns]\n",
+ "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "1.758 GB of memory reserved.\n",
+ "Unsloth 2024.6 patched 28 layers with 0 QKV layers, 28 O layers and 28 MLP layers.\n",
+ "/home/inflaton/miniconda3/envs/llm-fine-tune/lib/python3.11/site-packages/transformers/training_args.py:1965: FutureWarning: `--push_to_hub_token` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `--hub_token` instead.\n",
+ " warnings.warn(\n",
+ "/home/inflaton/miniconda3/envs/llm-fine-tune/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:269: UserWarning: You passed a `max_seq_length` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.\n",
+ " warnings.warn(\n",
+ "/home/inflaton/miniconda3/envs/llm-fine-tune/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:283: UserWarning: You passed a `dataset_num_proc` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.\n",
+ " warnings.warn(\n",
+ "/home/inflaton/miniconda3/envs/llm-fine-tune/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:307: UserWarning: You passed a `dataset_text_field` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.\n",
+ " warnings.warn(\n",
+ "Map (num_proc=2): 100%|████████████| 4528/4528 [00:02<00:00, 2245.59 examples/s]\n",
+ "(4) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "1.758 GB of memory reserved.\n",
+ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n",
+ " \\\\ /| Num examples = 4,528 | Num Epochs = 10\n",
+ "O^O/ \\_/ \\ Batch size per device = 2 | Gradient Accumulation steps = 4\n",
+ "\\ / Total batch size = 8 | Total steps = 5,660\n",
+ " \"-____-\" Number of trainable parameters = 18,464,768\n",
+ "{'loss': 1.7415, 'grad_norm': 0.6513338088989258, 'learning_rate': 0.00019664014146772768, 'epoch': 0.18}\n",
+ "{'loss': 1.5683, 'grad_norm': 0.6299685835838318, 'learning_rate': 0.0001931034482758621, 'epoch': 0.35}\n",
+ "{'loss': 1.5159, 'grad_norm': 0.6519908905029297, 'learning_rate': 0.00018956675508399648, 'epoch': 0.53}\n",
+ "{'loss': 1.5172, 'grad_norm': 0.580685019493103, 'learning_rate': 0.00018603006189213086, 'epoch': 0.71}\n",
+ "{'loss': 1.496, 'grad_norm': 0.5678043365478516, 'learning_rate': 0.00018249336870026527, 'epoch': 0.88}\n",
+ "{'loss': 1.4184, 'grad_norm': 0.6064684987068176, 'learning_rate': 0.00017895667550839965, 'epoch': 1.06}\n",
+ "{'loss': 1.2976, 'grad_norm': 0.7216887474060059, 'learning_rate': 0.00017541998231653406, 'epoch': 1.24}\n",
+ "{'loss': 1.3393, 'grad_norm': 0.6848570108413696, 'learning_rate': 0.00017188328912466844, 'epoch': 1.41}\n",
+ "{'loss': 1.2784, 'grad_norm': 0.9106780886650085, 'learning_rate': 0.00016834659593280285, 'epoch': 1.59}\n",
+ "{'loss': 1.2801, 'grad_norm': 0.8649025559425354, 'learning_rate': 0.00016480990274093723, 'epoch': 1.77}\n",
+ "{'loss': 1.3313, 'grad_norm': 0.880467414855957, 'learning_rate': 0.00016127320954907164, 'epoch': 1.94}\n",
+ "{'loss': 1.1276, 'grad_norm': 1.0269683599472046, 'learning_rate': 0.000157736516357206, 'epoch': 2.12}\n",
+ "{'loss': 0.9956, 'grad_norm': 1.0964370965957642, 'learning_rate': 0.0001541998231653404, 'epoch': 2.3}\n",
+ "{'loss': 1.0299, 'grad_norm': 1.298215389251709, 'learning_rate': 0.0001506631299734748, 'epoch': 2.47}\n",
+ "{'loss': 1.0348, 'grad_norm': 1.1091656684875488, 'learning_rate': 0.0001471264367816092, 'epoch': 2.65}\n",
+ "{'loss': 1.0346, 'grad_norm': 1.0557124614715576, 'learning_rate': 0.0001435897435897436, 'epoch': 2.83}\n",
+ "{'loss': 1.0081, 'grad_norm': 0.8857123255729675, 'learning_rate': 0.000140053050397878, 'epoch': 3.0}\n",
+ "{'loss': 0.6977, 'grad_norm': 1.3270587921142578, 'learning_rate': 0.0001365163572060124, 'epoch': 3.18}\n",
+ "{'loss': 0.7104, 'grad_norm': 1.3964347839355469, 'learning_rate': 0.00013297966401414678, 'epoch': 3.36}\n",
+ "{'loss': 0.7135, 'grad_norm': 1.324993371963501, 'learning_rate': 0.0001294429708222812, 'epoch': 3.53}\n",
+ "{'loss': 0.7054, 'grad_norm': 1.4066098928451538, 'learning_rate': 0.00012590627763041555, 'epoch': 3.71}\n",
+ "{'loss': 0.7424, 'grad_norm': 1.287760615348816, 'learning_rate': 0.00012236958443854996, 'epoch': 3.89}\n",
+ "{'loss': 0.6421, 'grad_norm': 1.2553821802139282, 'learning_rate': 0.00011883289124668435, 'epoch': 4.06}\n",
+ "{'loss': 0.4585, 'grad_norm': 1.6327370405197144, 'learning_rate': 0.00011529619805481875, 'epoch': 4.24}\n",
+ "{'loss': 0.4444, 'grad_norm': 1.6593241691589355, 'learning_rate': 0.00011175950486295315, 'epoch': 4.42}\n",
+ "{'loss': 0.4802, 'grad_norm': 1.5733627080917358, 'learning_rate': 0.00010822281167108754, 'epoch': 4.59}\n",
+ "{'loss': 0.4779, 'grad_norm': 1.6328530311584473, 'learning_rate': 0.00010468611847922194, 'epoch': 4.77}\n",
+ "{'loss': 0.4873, 'grad_norm': 1.664158582687378, 'learning_rate': 0.00010114942528735633, 'epoch': 4.95}\n",
+ "{'loss': 0.3602, 'grad_norm': 1.923504114151001, 'learning_rate': 9.761273209549072e-05, 'epoch': 5.12}\n",
+ "{'loss': 0.2924, 'grad_norm': 1.2101478576660156, 'learning_rate': 9.407603890362513e-05, 'epoch': 5.3}\n",
+ "{'loss': 0.2975, 'grad_norm': 1.3352692127227783, 'learning_rate': 9.053934571175951e-05, 'epoch': 5.48}\n",
+ "{'loss': 0.3142, 'grad_norm': 1.528309941291809, 'learning_rate': 8.70026525198939e-05, 'epoch': 5.65}\n",
+ "{'loss': 0.306, 'grad_norm': 1.3299399614334106, 'learning_rate': 8.34659593280283e-05, 'epoch': 5.83}\n",
+ "{'loss': 0.3081, 'grad_norm': 0.5754266381263733, 'learning_rate': 7.99292661361627e-05, 'epoch': 6.01}\n",
+ "{'loss': 0.1935, 'grad_norm': 1.24480402469635, 'learning_rate': 7.639257294429708e-05, 'epoch': 6.18}\n",
+ "{'loss': 0.2045, 'grad_norm': 1.5091683864593506, 'learning_rate': 7.285587975243147e-05, 'epoch': 6.36}\n",
+ "{'loss': 0.2071, 'grad_norm': 0.9175063371658325, 'learning_rate': 6.931918656056587e-05, 'epoch': 6.54}\n",
+ "{'loss': 0.2055, 'grad_norm': 1.013519525527954, 'learning_rate': 6.578249336870027e-05, 'epoch': 6.71}\n",
+ "{'loss': 0.2133, 'grad_norm': 2.126910448074341, 'learning_rate': 6.224580017683466e-05, 'epoch': 6.89}\n",
+ "{'loss': 0.194, 'grad_norm': 0.8879603147506714, 'learning_rate': 5.870910698496905e-05, 'epoch': 7.07}\n",
+ "{'loss': 0.149, 'grad_norm': 0.7381364107131958, 'learning_rate': 5.517241379310345e-05, 'epoch': 7.24}\n",
+ "{'loss': 0.1564, 'grad_norm': 0.7718709111213684, 'learning_rate': 5.163572060123785e-05, 'epoch': 7.42}\n",
+ "{'loss': 0.1573, 'grad_norm': 0.6855681538581848, 'learning_rate': 4.809902740937224e-05, 'epoch': 7.6}\n",
+ "{'loss': 0.1578, 'grad_norm': 0.9972281455993652, 'learning_rate': 4.4562334217506634e-05, 'epoch': 7.77}\n",
+ "{'loss': 0.1588, 'grad_norm': 0.818317174911499, 'learning_rate': 4.1025641025641023e-05, 'epoch': 7.95}\n",
+ "{'loss': 0.1375, 'grad_norm': 0.46658992767333984, 'learning_rate': 3.7488947833775426e-05, 'epoch': 8.13}\n",
+ "{'loss': 0.1304, 'grad_norm': 0.5777143836021423, 'learning_rate': 3.3952254641909815e-05, 'epoch': 8.3}\n",
+ "{'loss': 0.1315, 'grad_norm': 0.4499460756778717, 'learning_rate': 3.041556145004421e-05, 'epoch': 8.48}\n",
+ "{'loss': 0.1327, 'grad_norm': 0.4142455756664276, 'learning_rate': 2.6878868258178604e-05, 'epoch': 8.66}\n",
+ "{'loss': 0.1337, 'grad_norm': 0.6300601959228516, 'learning_rate': 2.3342175066313e-05, 'epoch': 8.83}\n",
+ "{'loss': 0.1327, 'grad_norm': 0.4785163998603821, 'learning_rate': 1.9805481874447392e-05, 'epoch': 9.01}\n",
+ "{'loss': 0.1178, 'grad_norm': 0.4512840211391449, 'learning_rate': 1.6268788682581788e-05, 'epoch': 9.19}\n",
+ "{'loss': 0.1186, 'grad_norm': 0.5308012366294861, 'learning_rate': 1.273209549071618e-05, 'epoch': 9.36}\n",
+ "{'loss': 0.1195, 'grad_norm': 0.4567421078681946, 'learning_rate': 9.195402298850575e-06, 'epoch': 9.54}\n",
+ "{'loss': 0.1203, 'grad_norm': 0.68560391664505, 'learning_rate': 5.658709106984969e-06, 'epoch': 9.72}\n",
+ "{'loss': 0.1231, 'grad_norm': 0.41683992743492126, 'learning_rate': 2.1220159151193635e-06, 'epoch': 9.89}\n",
+ "{'train_runtime': 16113.2955, 'train_samples_per_second': 2.81, 'train_steps_per_second': 0.351, 'train_loss': 0.5992827712857681, 'epoch': 10.0}\n",
+ "100%|█████████████████████████████████████| 5660/5660 [4:28:33<00:00, 2.85s/it]\n",
+ "(5) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "16113.2955 seconds used for training.\n",
+ "268.55 minutes used for training.\n",
+ "Peak reserved memory = 2.367 GB.\n",
+ "Peak reserved memory for training = 0.609 GB.\n",
+ "Peak reserved memory % of max memory = 19.735 %.\n",
+ "Peak reserved memory for training % of max memory = 5.078 %.\n",
+ "Evaluating fine-tuned model: unsloth/Qwen2-1.5B-Instruct-bnb-4bit\n",
+ " 0%| | 0/1133 [00:00, ?it/s]--------\n",
+ "step 1: Old Geng raised his pistol, cocked it, and fired – a string of pocky-gun shots sent the shooting birds skittering to the ground.<|im_end|>\n",
+ "--------\n",
+ "step 2: Old Geng raised his pistol, cocked it, and fired – a string of pocky-gun shots sent the shooting birds skittering to the ground.\n",
+ "--------\n",
+ "step 3: Old Geng raised his pistol, cocked it, and fired – a string of pocky-gun shots sent the shooting birds skittering to the ground.\n",
+ "100%|█████████████████████████████████████| 1133/1133 [1:44:47<00:00, 5.55s/it]\n",
+ " chinese ... unsloth/Qwen2-1.5B-Instruct-bnb-4bit(finetuned)\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Geng raised his pistol, cocked it, and fir...\n",
+ "\n",
+ "[1 rows x 10 columns]\n",
+ "(6) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "2.367 GB of memory reserved.\n",
+ "model.safetensors: 100%|███████████████████| 1.22G/1.22G [04:55<00:00, 4.12MB/s]\n",
+ "Saved model to https://huggingface.co/Qwen2-1.5B-Instruct-bnb-4bit-MAC-lora\n",
+ "README.md: 100%|███████████████████████████████| 599/599 [00:00<00:00, 2.91MB/s]\n",
+ "Unsloth: Saving tokenizer... Done.\n",
+ "Unsloth: Saving model... Done.\n",
+ "Unsloth: Saving LoRA adapters. Please wait...\n",
+ "401 Client Error: Unauthorized for url: https://huggingface.co/api/repos/create (Request ID: Root=1-667c46cc-179d7b0b46deeb6521c1ce66;676583ba-2144-4b1c-928b-aafd3708580e)\n",
+ "\n",
+ "Invalid username or password.\n",
+ "CPU times: user 20min 59s, sys: 8min 13s, total: 29min 13s\n",
+ "Wall time: 14h 32min 29s\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "!./scripts/tune-small.sh"
+ ]
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "application/vnd.databricks.v1+notebook": {
+ "dashboards": [],
+ "environmentMetadata": null,
+ "language": "python",
+ "notebookMetadata": {
+ "pythonIndentUnit": 4
+ },
+ "notebookName": "07_MAC_+_Qwen2-7B-Instructi_Unsloth_train",
+ "widgets": {}
+ },
+ "colab": {
+ "gpuType": "T4",
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.1.-1"
+ },
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "036fc5746f43416db18c19ad8fd36677": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "06e806c82c7b4cbea31c5358dd9c3434": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "087b76a8b7514269b1f0ab29b062e444": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_a069d2ab23824f29aa320ac256e2cfe9",
+ "placeholder": "",
+ "style": "IPY_MODEL_06e806c82c7b4cbea31c5358dd9c3434",
+ "value": "Map (num_proc=2): 100%"
+ }
+ },
+ "09b76013aa9e45efb6deb23a7a0d0925": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_dea41c5260884aa6879b5e1d1697b14f",
+ "placeholder": "",
+ "style": "IPY_MODEL_89965917796a4f81b899fdc7685f33df",
+ "value": "config.json: 100%"
+ }
+ },
+ "0a92c56bfa134ef583220d7ef0b13e17": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "0c34be936c8145d3ab41282f30a70713": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "0f8b6bfe16894500838793f2491d403f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "177c78fce95d4b4ab33057c5a048d693": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "1f44c9ce1adf470cbb19784493ed209f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_0c34be936c8145d3ab41282f30a70713",
+ "placeholder": "",
+ "style": "IPY_MODEL_0a92c56bfa134ef583220d7ef0b13e17",
+ "value": "model.safetensors: 100%"
+ }
+ },
+ "201b59ccd9f845e197029b57e424aefc": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "2157f01726d748f8a9ae4a00664430da": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "21db8a77b00d4a4e82fdfa608657531f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "26e4202cca81496a90d15a0dd4ca9cf1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_ba90fdb8822d47dab7ba203bee297f37",
+ "IPY_MODEL_61560ff6a36b44f4a9dfdae5c52791d4",
+ "IPY_MODEL_95fbe66647904c06a20f640630d6dc0e"
+ ],
+ "layout": "IPY_MODEL_57182a263d324a3dbf1471c74290a0d5"
+ }
+ },
+ "27155728b6b84cb199c91c940095d0a8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_6b91feeed5464877991ac2c207aebe7c",
+ "IPY_MODEL_cca8113c54c0495daedce1327bf9c68b",
+ "IPY_MODEL_2e63a29e2f7247bba5beede9a568c99f"
+ ],
+ "layout": "IPY_MODEL_5c9d781c28944f3eb86e2a6d44efdf18"
+ }
+ },
+ "271ddaa553a042d09b6db7b450643d8f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "2a58d04b428c46f4b3dbadd3bc6cd529": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2d18ddf6482c4d97829ac0e5a7b9868f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_9f679ad3ec7f4fe8ad0510ffb57bc2ab",
+ "IPY_MODEL_f2df530d22c74977b249dd9fb5f4829b",
+ "IPY_MODEL_89b2ef0dbfea47ab8e6f8d659e3351d1"
+ ],
+ "layout": "IPY_MODEL_3056b148aa9f4e6e8aa3b61d26886255"
+ }
+ },
+ "2e5087c76f98437cb5dc729230358cba": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2e63a29e2f7247bba5beede9a568c99f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b993eaec6b224440bf80c0958c6fb536",
+ "placeholder": "",
+ "style": "IPY_MODEL_de868e26e7154f62aa86223a539ad421",
+ "value": " 464/464 [00:00<00:00, 27.1kB/s]"
+ }
+ },
+ "2f6c70dd266c4816bfad3fd3d192929a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "30307300bc4e4baf96560e30969a82b6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e36a3f9eff0e4cf68834d66b0213ae96",
+ "placeholder": "",
+ "style": "IPY_MODEL_a0037bdccf254159becde630bee3d1db",
+ "value": "generation_config.json: 100%"
+ }
+ },
+ "3056b148aa9f4e6e8aa3b61d26886255": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "30cdc32298134cb0be4d41615b9e5774": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "3572201bd4d74a58b7a665f9bdfdcdba": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "35b0e8c26d6640e9bd0ed7b242a423d8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_2e5087c76f98437cb5dc729230358cba",
+ "max": 51760,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_036fc5746f43416db18c19ad8fd36677",
+ "value": 51760
+ }
+ },
+ "36166c7bcb854b34aca1f41a5d6ea50b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "370692d819df41828b48c4ad446f977b": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "39b29a75374b45c0a22506010be2b84e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_30cdc32298134cb0be4d41615b9e5774",
+ "max": 1179,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_47928317548c454bba6358ab132e8dee",
+ "value": 1179
+ }
+ },
+ "3cf2dd993b5e4d3daecf61e4bab5a404": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_087b76a8b7514269b1f0ab29b062e444",
+ "IPY_MODEL_35b0e8c26d6640e9bd0ed7b242a423d8",
+ "IPY_MODEL_54ad89e05fd74576b9b8b5b5a10eaf8d"
+ ],
+ "layout": "IPY_MODEL_a41dc44766444a998bec2d777f249d23"
+ }
+ },
+ "43dec2ede91341f5af60eb522e18e984": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4463edd481c1467f914c7dcd6c6e6ffc": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "47928317548c454bba6358ab132e8dee": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "49277aeeac16434a865a4d12308b1abc": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4ae7e449e4ea4c729b5f34607c18ebae": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4b2061b8a73c43ffb0c2f83daf0d0183": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4c4c88d4c701450692fa0f6b0c5764b0": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4c666f4ace3943f8b80ecd20e7503236": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "4ccedf0d93094e63b57a0f8a434fba06": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4463edd481c1467f914c7dcd6c6e6ffc",
+ "max": 44307561,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_6d3b9a05db0b4dadb638c686faa0c40a",
+ "value": 44307561
+ }
+ },
+ "4dcf6ff672d24983a1877a8431709aa9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_5807d5fb827d490fb3bc698f801ffff5",
+ "placeholder": "",
+ "style": "IPY_MODEL_c4f2b06a82fd4987b8b659524a7b503b",
+ "value": "Generating train split: 100%"
+ }
+ },
+ "4ea63adfce694725bdba878aef709dd3": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5234566b1bfc4655b8d582ea5b46ed9f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "54ad89e05fd74576b9b8b5b5a10eaf8d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_fdb1941405ed4e4aa06019933892deb3",
+ "placeholder": "",
+ "style": "IPY_MODEL_668d5377ca56426a99753867e6e24862",
+ "value": " 51760/51760 [01:02<00:00, 1131.51 examples/s]"
+ }
+ },
+ "56aee4853b7740e6a977254f5d1fa66d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "57182a263d324a3dbf1471c74290a0d5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5807d5fb827d490fb3bc698f801ffff5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5c9d781c28944f3eb86e2a6d44efdf18": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5f40db8173dd4d76b6ef5ed6d9ec8b6e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "61560ff6a36b44f4a9dfdae5c52791d4": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_db19fc8d37db4e45a5790a876836d8c4",
+ "max": 11610,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_36166c7bcb854b34aca1f41a5d6ea50b",
+ "value": 11610
+ }
+ },
+ "6578fd7acdb54c4c93528ea431fd0144": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_370692d819df41828b48c4ad446f977b",
+ "placeholder": "",
+ "style": "IPY_MODEL_a0bf9160eb2647409b3200270914b90f",
+ "value": " 50.6k/50.6k [00:00<00:00, 2.71MB/s]"
+ }
+ },
+ "668d5377ca56426a99753867e6e24862": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "697f027529b54ee9956bae78a11e0611": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "69ac12aec0714318bf2c83d4f4e745f5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "6b2012c3f88547af8884a9ea90e3164b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_938f45f1b3e24118b815d96ae34ba86a",
+ "placeholder": "",
+ "style": "IPY_MODEL_9367047a800747f79c6b225d92397846",
+ "value": " 44.3M/44.3M [00:01<00:00, 31.0MB/s]"
+ }
+ },
+ "6b91feeed5464877991ac2c207aebe7c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4b2061b8a73c43ffb0c2f83daf0d0183",
+ "placeholder": "",
+ "style": "IPY_MODEL_69ac12aec0714318bf2c83d4f4e745f5",
+ "value": "special_tokens_map.json: 100%"
+ }
+ },
+ "6d3b9a05db0b4dadb638c686faa0c40a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "6dbbedeca9314e66ae50e44ffa31a414": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "6e34619b45934040b6092e6fb01ea7fe": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "71ce208e20d6483abb9ed923510c86d7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d69dc491b3ab44d7852b21873ed7bb7f",
+ "placeholder": "",
+ "style": "IPY_MODEL_f401d53bf28e44eb906bce6c05412662",
+ "value": " 51760/51760 [00:01<00:00, 45512.81 examples/s]"
+ }
+ },
+ "7358cdad832342c983e31efb8754ab78": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "73e352a3404f4c7dad0737f57d29e92f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_988a0e8c1f89446086858da0a891a79c",
+ "IPY_MODEL_4ccedf0d93094e63b57a0f8a434fba06",
+ "IPY_MODEL_6b2012c3f88547af8884a9ea90e3164b"
+ ],
+ "layout": "IPY_MODEL_7e29cb8dd4df4d5b94407cd8fd3f2011"
+ }
+ },
+ "74501720ac7e4dbb911a4a99b3633bc6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "78e5400bff924a92a4cc61c4ff18b182": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b9b313fd861948f5aba25b24b1518d30",
+ "placeholder": "",
+ "style": "IPY_MODEL_4c666f4ace3943f8b80ecd20e7503236",
+ "value": " 1.18k/1.18k [00:00<00:00, 31.3kB/s]"
+ }
+ },
+ "7975adbc2ec5489ea7fa0167e620d85c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_6e34619b45934040b6092e6fb01ea7fe",
+ "max": 51760,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_271ddaa553a042d09b6db7b450643d8f",
+ "value": 51760
+ }
+ },
+ "7e29cb8dd4df4d5b94407cd8fd3f2011": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "810ff6c0e17d4fa09a30fef27eacff90": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "89965917796a4f81b899fdc7685f33df": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "89b2ef0dbfea47ab8e6f8d659e3351d1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b8908fa0df3743ecb9d12983a739104f",
+ "placeholder": "",
+ "style": "IPY_MODEL_177c78fce95d4b4ab33057c5a048d693",
+ "value": " 9.09M/9.09M [00:00<00:00, 32.6MB/s]"
+ }
+ },
+ "8b3505352a5a42bf910428c40ce40465": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_49277aeeac16434a865a4d12308b1abc",
+ "placeholder": "",
+ "style": "IPY_MODEL_2157f01726d748f8a9ae4a00664430da",
+ "value": " 5.70G/5.70G [01:02<00:00, 30.1MB/s]"
+ }
+ },
+ "8fc142b628fb40568730234de1cafde2": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4ae7e449e4ea4c729b5f34607c18ebae",
+ "max": 172,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_3572201bd4d74a58b7a665f9bdfdcdba",
+ "value": 172
+ }
+ },
+ "9367047a800747f79c6b225d92397846": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "938f45f1b3e24118b815d96ae34ba86a": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "95fbe66647904c06a20f640630d6dc0e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b0a370dc20654b279b9680692e34418e",
+ "placeholder": "",
+ "style": "IPY_MODEL_cfeb365ddf7548d58b2557f22737fcf5",
+ "value": " 11.6k/11.6k [00:00<00:00, 716kB/s]"
+ }
+ },
+ "988a0e8c1f89446086858da0a891a79c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_ad2be500fc164c0f86f33e914ef8e6a0",
+ "placeholder": "",
+ "style": "IPY_MODEL_5234566b1bfc4655b8d582ea5b46ed9f",
+ "value": "Downloading data: 100%"
+ }
+ },
+ "98c58f23f4d549518832cb2d18f796e8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_09b76013aa9e45efb6deb23a7a0d0925",
+ "IPY_MODEL_39b29a75374b45c0a22506010be2b84e",
+ "IPY_MODEL_78e5400bff924a92a4cc61c4ff18b182"
+ ],
+ "layout": "IPY_MODEL_2a58d04b428c46f4b3dbadd3bc6cd529"
+ }
+ },
+ "99fdbb0300c14c139d1937c646f0cfe7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_7358cdad832342c983e31efb8754ab78",
+ "placeholder": "",
+ "style": "IPY_MODEL_e9adf418296e436fb48bb9f78885598b",
+ "value": " 51760/51760 [00:01<00:00, 38665.95 examples/s]"
+ }
+ },
+ "9f679ad3ec7f4fe8ad0510ffb57bc2ab": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4ea63adfce694725bdba878aef709dd3",
+ "placeholder": "",
+ "style": "IPY_MODEL_74501720ac7e4dbb911a4a99b3633bc6",
+ "value": "tokenizer.json: 100%"
+ }
+ },
+ "a0037bdccf254159becde630bee3d1db": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "a069d2ab23824f29aa320ac256e2cfe9": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a0bf9160eb2647409b3200270914b90f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "a41dc44766444a998bec2d777f249d23": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a8464a4c711e4e00aafdfc919b60d07e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_fb995c740590427b882572c81d4e848c",
+ "placeholder": "",
+ "style": "IPY_MODEL_201b59ccd9f845e197029b57e424aefc",
+ "value": " 172/172 [00:00<00:00, 12.0kB/s]"
+ }
+ },
+ "a9f0cc51fc3d4d7b874c32dcf1c5bdf2": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ad2be500fc164c0f86f33e914ef8e6a0": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b0240cd9a4554b29ae11f8051984a1c6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_edaf890370314a218f138015faa0b05d",
+ "placeholder": "",
+ "style": "IPY_MODEL_697f027529b54ee9956bae78a11e0611",
+ "value": "Map: 100%"
+ }
+ },
+ "b0a370dc20654b279b9680692e34418e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b518dcee69074b87be73957cd810e7ed": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d891f8d0b1fc462f8008d02bb2a15692",
+ "placeholder": "",
+ "style": "IPY_MODEL_cced8fd7e998472794f3f3e3018956a5",
+ "value": "tokenizer_config.json: 100%"
+ }
+ },
+ "b8908fa0df3743ecb9d12983a739104f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b993eaec6b224440bf80c0958c6fb536": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b9b313fd861948f5aba25b24b1518d30": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ba90fdb8822d47dab7ba203bee297f37": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_0f8b6bfe16894500838793f2491d403f",
+ "placeholder": "",
+ "style": "IPY_MODEL_bb19f6c747754682a514373a3a0535ba",
+ "value": "Downloading readme: 100%"
+ }
+ },
+ "bb19f6c747754682a514373a3a0535ba": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "bc883d4cf13e4f8b8a4fe5f410cb6efd": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e9159e03e61f4f56978ece9c3bca49b2",
+ "max": 51760,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_810ff6c0e17d4fa09a30fef27eacff90",
+ "value": 51760
+ }
+ },
+ "c161d94df0f04feba9542237e0856c22": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "c22f71b1f85843209d7e5321506b9cb9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_1f44c9ce1adf470cbb19784493ed209f",
+ "IPY_MODEL_f1addc4479d849879e743cf9089e6540",
+ "IPY_MODEL_8b3505352a5a42bf910428c40ce40465"
+ ],
+ "layout": "IPY_MODEL_4c4c88d4c701450692fa0f6b0c5764b0"
+ }
+ },
+ "c4f2b06a82fd4987b8b659524a7b503b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "cca8113c54c0495daedce1327bf9c68b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e02f9b7849c64531835eb77b860d1c93",
+ "max": 464,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_56aee4853b7740e6a977254f5d1fa66d",
+ "value": 464
+ }
+ },
+ "cced8fd7e998472794f3f3e3018956a5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "cf245afeb1c04f29a24d291608c3d157": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_b518dcee69074b87be73957cd810e7ed",
+ "IPY_MODEL_e29104486d594b2992d7285e0ef77371",
+ "IPY_MODEL_6578fd7acdb54c4c93528ea431fd0144"
+ ],
+ "layout": "IPY_MODEL_d35db8148a354c56aaac56dbae22536f"
+ }
+ },
+ "cfe8cae0e22b495bafa221a63d13b283": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "cfeb365ddf7548d58b2557f22737fcf5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "d1b47d39450d4019ae85c9b2f943eeaf": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_4dcf6ff672d24983a1877a8431709aa9",
+ "IPY_MODEL_7975adbc2ec5489ea7fa0167e620d85c",
+ "IPY_MODEL_71ce208e20d6483abb9ed923510c86d7"
+ ],
+ "layout": "IPY_MODEL_cfe8cae0e22b495bafa221a63d13b283"
+ }
+ },
+ "d35db8148a354c56aaac56dbae22536f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d69dc491b3ab44d7852b21873ed7bb7f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d891f8d0b1fc462f8008d02bb2a15692": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d8e5318cead340c4adbeaccc05d39225": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "daf4cd890b35422683d22fd30bc71e83": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_b0240cd9a4554b29ae11f8051984a1c6",
+ "IPY_MODEL_bc883d4cf13e4f8b8a4fe5f410cb6efd",
+ "IPY_MODEL_99fdbb0300c14c139d1937c646f0cfe7"
+ ],
+ "layout": "IPY_MODEL_c161d94df0f04feba9542237e0856c22"
+ }
+ },
+ "db19fc8d37db4e45a5790a876836d8c4": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "de868e26e7154f62aa86223a539ad421": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "dea41c5260884aa6879b5e1d1697b14f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e02f9b7849c64531835eb77b860d1c93": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e29104486d594b2992d7285e0ef77371": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_a9f0cc51fc3d4d7b874c32dcf1c5bdf2",
+ "max": 50641,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_2f6c70dd266c4816bfad3fd3d192929a",
+ "value": 50641
+ }
+ },
+ "e36a3f9eff0e4cf68834d66b0213ae96": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e9159e03e61f4f56978ece9c3bca49b2": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e9adf418296e436fb48bb9f78885598b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "edaf890370314a218f138015faa0b05d": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "f1addc4479d849879e743cf9089e6540": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_43dec2ede91341f5af60eb522e18e984",
+ "max": 5702746405,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_d8e5318cead340c4adbeaccc05d39225",
+ "value": 5702746405
+ }
+ },
+ "f2df530d22c74977b249dd9fb5f4829b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_21db8a77b00d4a4e82fdfa608657531f",
+ "max": 9085698,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_6dbbedeca9314e66ae50e44ffa31a414",
+ "value": 9085698
+ }
+ },
+ "f401d53bf28e44eb906bce6c05412662": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "fb995c740590427b882572c81d4e848c": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "fce7a61c25ec4390af43d92b7c473a45": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_30307300bc4e4baf96560e30969a82b6",
+ "IPY_MODEL_8fc142b628fb40568730234de1cafde2",
+ "IPY_MODEL_a8464a4c711e4e00aafdfc919b60d07e"
+ ],
+ "layout": "IPY_MODEL_5f40db8173dd4d76b6ef5ed6d9ec8b6e"
+ }
+ },
+ "fdb1941405ed4e4aa06019933892deb3": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ }
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/notebooks/07_tune-lf-py3.11.ipynb b/notebooks/07_tune-lf-py3.11.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..57ffa0c4a882e10d986f5b1189156de26e3837e2
--- /dev/null
+++ b/notebooks/07_tune-lf-py3.11.ipynb
@@ -0,0 +1,7022 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "0ea8b46b-839b-445b-8043-ccdf4e920ace",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [],
+ "source": [
+ "%load_ext autoreload\n",
+ "%autoreload 2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "6d394937-6c99-4a7c-9d32-7600a280032f",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "workding dir: /home/inflaton/code/projects/courses/llm-finetuning\n"
+ ]
+ }
+ ],
+ "source": [
+ "import os\n",
+ "import sys\n",
+ "from pathlib import Path\n",
+ "\n",
+ "workding_dir = str(Path.cwd().parent)\n",
+ "os.chdir(workding_dir)\n",
+ "sys.path.append(workding_dir)\n",
+ "print(\"workding dir:\", workding_dir)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "9f67ec60-2f24-411c-84eb-0dd664b44775",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "True"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from dotenv import find_dotenv, load_dotenv\n",
+ "\n",
+ "found_dotenv = find_dotenv(\".env\")\n",
+ "\n",
+ "if len(found_dotenv) == 0:\n",
+ " found_dotenv = find_dotenv(\".env.example\")\n",
+ "print(f\"loading env vars from: {found_dotenv}\")\n",
+ "load_dotenv(found_dotenv, override=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "f1597656-8042-4878-9d3b-9ebfb8dd86dc",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "('unsloth/Qwen2-0.5B-Instruct-bnb-4bit',\n",
+ " True,\n",
+ " None,\n",
+ " None,\n",
+ " 2048,\n",
+ " 10,\n",
+ " None,\n",
+ " 'datasets/mac/mac.tsv',\n",
+ " 'results/mac-results_lf.csv')"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import os\n",
+ "\n",
+ "model_name = os.getenv(\"MODEL_NAME\")\n",
+ "token = os.getenv(\"HF_TOKEN\") or None\n",
+ "load_in_4bit = os.getenv(\"LOAD_IN_4BIT\") == \"true\"\n",
+ "local_model = os.getenv(\"LOCAL_MODEL\")\n",
+ "hub_model = os.getenv(\"HUB_MODEL\")\n",
+ "num_train_epochs = int(os.getenv(\"NUM_TRAIN_EPOCHS\") or 0)\n",
+ "data_path = os.getenv(\"DATA_PATH\")\n",
+ "results_path = os.getenv(\"RESULTS_PATH\")\n",
+ "\n",
+ "max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!\n",
+ "dtype = (\n",
+ " None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+\n",
+ ")\n",
+ "\n",
+ "model_name, load_in_4bit, local_model, hub_model, max_seq_length, num_train_epochs, dtype, data_path, results_path"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Sat Jun 29 17:26:00 2024 \n",
+ "+---------------------------------------------------------------------------------------+\n",
+ "| NVIDIA-SMI 545.23.07 Driver Version: 546.12 CUDA Version: 12.3 |\n",
+ "|-----------------------------------------+----------------------+----------------------+\n",
+ "| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
+ "| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n",
+ "| | | MIG M. |\n",
+ "|=========================================+======================+======================|\n",
+ "| 0 NVIDIA GeForce RTX 4080 ... On | 00000000:01:00.0 Off | N/A |\n",
+ "| N/A 50C P8 4W / 150W | 129MiB / 12282MiB | 0% Default |\n",
+ "| | | N/A |\n",
+ "+-----------------------------------------+----------------------+----------------------+\n",
+ " \n",
+ "+---------------------------------------------------------------------------------------+\n",
+ "| Processes: |\n",
+ "| GPU GI CI PID Type Process name GPU Memory |\n",
+ "| ID ID Usage |\n",
+ "|=======================================================================================|\n",
+ "| No running processes found |\n",
+ "+---------------------------------------------------------------------------------------+\n"
+ ]
+ }
+ ],
+ "source": [
+ "!nvidia-smi"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n",
+ "loading train/test data files\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "fabc731ff8e5499a9c842ef6833f3e98",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Generating train split: 0 examples [00:00, ? examples/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "2e186baa65dc4dd1956fa2db0d83b4a1",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Generating test split: 0 examples [00:00, ? examples/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n"
+ ]
+ }
+ ],
+ "source": [
+ "from llm_toolkit.translation_engine import load_translation_dataset\n",
+ "\n",
+ "dataset = load_translation_dataset(data_path)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = dataset[\"train\"].to_pandas()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "\n",
+ "df_alpaca = pd.DataFrame({\"instruction\": [\"Please translate the following Chinese text into English and provide only the translated content, nothing else.\"]*len(df)})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " instruction \n",
+ " input \n",
+ " output \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " Please translate the following Chinese text in... \n",
+ " 全仗着狐仙搭救。 \n",
+ " Because I was protected by a fox fairy. \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " Please translate the following Chinese text in... \n",
+ " 过后,表哥告诉她俩,这人是导演,在外国留过学的,还会编剧,今天拍的这戏,就是他自编自导的。 \n",
+ " He was the director, the cousin later told the... \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " Please translate the following Chinese text in... \n",
+ " 这凤姐忽然想起一件事来,便向窗外叫:“蓉儿回来!” \n",
+ " Xi-feng suddenly seemed to remember something,... \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " Please translate the following Chinese text in... \n",
+ " 三个老红卫兵走到叶文洁面前,面对着她站成了一排——当年,她们也是这样面对叶哲泰的——试图再现... \n",
+ " The three old Red Guards stood in front of Ye ... \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " Please translate the following Chinese text in... \n",
+ " 程先生照单全收,都是一个“谢”字,然后问王琦瑶有什么话说。 \n",
+ " Mr. Cheng accepted their toast with equanimity... \n",
+ " \n",
+ " \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " \n",
+ " \n",
+ " 4523 \n",
+ " Please translate the following Chinese text in... \n",
+ " 外边有两张腿歪面裂的八仙桌子,桌旁胡乱搡着几条狭窄的木凳。 \n",
+ " Two rickety tables with scarred tops and a few... \n",
+ " \n",
+ " \n",
+ " 4524 \n",
+ " Please translate the following Chinese text in... \n",
+ " 贾瑞听了,喜的抓耳挠腮。 \n",
+ " At this last remark Jia Rui positively scratch... \n",
+ " \n",
+ " \n",
+ " 4525 \n",
+ " Please translate the following Chinese text in... \n",
+ " 听了这样的评价,我们心情激动,和大家一起振臂高呼:打倒王二! \n",
+ " Hearing comments like this, our emotions were ... \n",
+ " \n",
+ " \n",
+ " 4526 \n",
+ " Please translate the following Chinese text in... \n",
+ " 海老公道:“记住了吗?” \n",
+ " 'Can you remember that?' \n",
+ " \n",
+ " \n",
+ " 4527 \n",
+ " Please translate the following Chinese text in... \n",
+ " 上面说,这样写缺少细节。 \n",
+ " This time the opinions from above said it need... \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
4528 rows × 3 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " instruction \\\n",
+ "0 Please translate the following Chinese text in... \n",
+ "1 Please translate the following Chinese text in... \n",
+ "2 Please translate the following Chinese text in... \n",
+ "3 Please translate the following Chinese text in... \n",
+ "4 Please translate the following Chinese text in... \n",
+ "... ... \n",
+ "4523 Please translate the following Chinese text in... \n",
+ "4524 Please translate the following Chinese text in... \n",
+ "4525 Please translate the following Chinese text in... \n",
+ "4526 Please translate the following Chinese text in... \n",
+ "4527 Please translate the following Chinese text in... \n",
+ "\n",
+ " input \\\n",
+ "0 全仗着狐仙搭救。 \n",
+ "1 过后,表哥告诉她俩,这人是导演,在外国留过学的,还会编剧,今天拍的这戏,就是他自编自导的。 \n",
+ "2 这凤姐忽然想起一件事来,便向窗外叫:“蓉儿回来!” \n",
+ "3 三个老红卫兵走到叶文洁面前,面对着她站成了一排——当年,她们也是这样面对叶哲泰的——试图再现... \n",
+ "4 程先生照单全收,都是一个“谢”字,然后问王琦瑶有什么话说。 \n",
+ "... ... \n",
+ "4523 外边有两张腿歪面裂的八仙桌子,桌旁胡乱搡着几条狭窄的木凳。 \n",
+ "4524 贾瑞听了,喜的抓耳挠腮。 \n",
+ "4525 听了这样的评价,我们心情激动,和大家一起振臂高呼:打倒王二! \n",
+ "4526 海老公道:“记住了吗?” \n",
+ "4527 上面说,这样写缺少细节。 \n",
+ "\n",
+ " output \n",
+ "0 Because I was protected by a fox fairy. \n",
+ "1 He was the director, the cousin later told the... \n",
+ "2 Xi-feng suddenly seemed to remember something,... \n",
+ "3 The three old Red Guards stood in front of Ye ... \n",
+ "4 Mr. Cheng accepted their toast with equanimity... \n",
+ "... ... \n",
+ "4523 Two rickety tables with scarred tops and a few... \n",
+ "4524 At this last remark Jia Rui positively scratch... \n",
+ "4525 Hearing comments like this, our emotions were ... \n",
+ "4526 'Can you remember that?' \n",
+ "4527 This time the opinions from above said it need... \n",
+ "\n",
+ "[4528 rows x 3 columns]"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_alpaca[\"input\"] = df[\"chinese\"]\n",
+ "df_alpaca[\"output\"] = df[\"english\"]\n",
+ "df_alpaca"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_alpaca.to_json(\n",
+ " \"llama-factory/data/alpaca_mac.json\", orient=\"records\", lines=False, indent=2\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.read_json(\"llama-factory/data/alpaca_mac.json\", orient=\"records\", lines=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " instruction \n",
+ " input \n",
+ " output \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " Please translate the following Chinese text in... \n",
+ " 全仗着狐仙搭救。 \n",
+ " Because I was protected by a fox fairy. \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " Please translate the following Chinese text in... \n",
+ " 过后,表哥告诉她俩,这人是导演,在外国留过学的,还会编剧,今天拍的这戏,就是他自编自导的。 \n",
+ " He was the director, the cousin later told the... \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " Please translate the following Chinese text in... \n",
+ " 这凤姐忽然想起一件事来,便向窗外叫:“蓉儿回来!” \n",
+ " Xi-feng suddenly seemed to remember something,... \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " Please translate the following Chinese text in... \n",
+ " 三个老红卫兵走到叶文洁面前,面对着她站成了一排——当年,她们也是这样面对叶哲泰的——试图再现... \n",
+ " The three old Red Guards stood in front of Ye ... \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " Please translate the following Chinese text in... \n",
+ " 程先生照单全收,都是一个“谢”字,然后问王琦瑶有什么话说。 \n",
+ " Mr. Cheng accepted their toast with equanimity... \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " instruction \\\n",
+ "0 Please translate the following Chinese text in... \n",
+ "1 Please translate the following Chinese text in... \n",
+ "2 Please translate the following Chinese text in... \n",
+ "3 Please translate the following Chinese text in... \n",
+ "4 Please translate the following Chinese text in... \n",
+ "\n",
+ " input \\\n",
+ "0 全仗着狐仙搭救。 \n",
+ "1 过后,表哥告诉她俩,这人是导演,在外国留过学的,还会编剧,今天拍的这戏,就是他自编自导的。 \n",
+ "2 这凤姐忽然想起一件事来,便向窗外叫:“蓉儿回来!” \n",
+ "3 三个老红卫兵走到叶文洁面前,面对着她站成了一排——当年,她们也是这样面对叶哲泰的——试图再现... \n",
+ "4 程先生照单全收,都是一个“谢”字,然后问王琦瑶有什么话说。 \n",
+ "\n",
+ " output \n",
+ "0 Because I was protected by a fox fairy. \n",
+ "1 He was the director, the cousin later told the... \n",
+ "2 Xi-feng suddenly seemed to remember something,... \n",
+ "3 The three old Red Guards stood in front of Ye ... \n",
+ "4 Mr. Cheng accepted their toast with equanimity... "
+ ]
+ },
+ "execution_count": 34,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Python 3.11.9\n",
+ "\u001b[33mWARNING: Package(s) not found: flash-attn\u001b[0m\u001b[33m\n",
+ "\u001b[0mCPU times: user 23.2 ms, sys: 3.38 ms, total: 26.6 ms\n",
+ "Wall time: 518 ms\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "!python --version\n",
+ "!pip show flash-attn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Current Directory:\n",
+ "/home/inflaton/code/projects/courses/llm-finetuning/llama-factory\n",
+ "06/29/2024 21:58:18 - INFO - llamafactory.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, compute dtype: torch.bfloat16\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-29 21:58:18,444 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/vocab.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-29 21:58:18,444 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/merges.txt\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-29 21:58:18,444 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-29 21:58:18,444 >> loading file added_tokens.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-29 21:58:18,444 >> loading file special_tokens_map.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-29 21:58:18,444 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer_config.json\n",
+ "[WARNING|logging.py:313] 2024-06-29 21:58:18,572 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "06/29/2024 21:58:18 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+ "06/29/2024 21:58:18 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+ "06/29/2024 21:58:18 - INFO - llamafactory.data.loader - Loading dataset alpaca_mac.json...\n",
+ "Converting format of dataset (num_proc=16): 100%|█| 4528/4528 [00:00<00:00, 1613\n",
+ "Running tokenizer on dataset (num_proc=16): 100%|█| 4528/4528 [00:01<00:00, 3159\n",
+ "input_ids:\n",
+ "[151644, 872, 198, 5501, 14683, 279, 2701, 8453, 1467, 1119, 6364, 323, 3410, 1172, 279, 24531, 2213, 11, 4302, 770, 624, 35987, 102895, 99164, 100324, 100717, 100095, 99509, 1773, 151645, 198, 151644, 77091, 198, 17949, 358, 572, 2617, 553, 264, 38835, 44486, 13, 151645]\n",
+ "inputs:\n",
+ "<|im_start|>user\n",
+ "Please translate the following Chinese text into English and provide only the translated content, nothing else.\n",
+ "全仗着狐仙搭救。<|im_end|>\n",
+ "<|im_start|>assistant\n",
+ "Because I was protected by a fox fairy.<|im_end|>\n",
+ "label_ids:\n",
+ "[-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 17949, 358, 572, 2617, 553, 264, 38835, 44486, 13, 151645]\n",
+ "labels:\n",
+ "Because I was protected by a fox fairy.<|im_end|>\n",
+ "[INFO|configuration_utils.py:733] 2024-06-29 21:58:21,872 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-06-29 21:58:21,873 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|modeling_utils.py:3556] 2024-06-29 21:58:21,942 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/model.safetensors\n",
+ "[INFO|modeling_utils.py:1531] 2024-06-29 21:58:24,477 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+ "[INFO|configuration_utils.py:1000] 2024-06-29 21:58:24,480 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645\n",
+ "}\n",
+ "\n",
+ "[INFO|modeling_utils.py:4364] 2024-06-29 21:58:59,030 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+ "\n",
+ "[INFO|modeling_utils.py:4372] 2024-06-29 21:58:59,030 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-0.5B-Instruct.\n",
+ "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+ "[INFO|configuration_utils.py:955] 2024-06-29 21:58:59,317 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/generation_config.json\n",
+ "[INFO|configuration_utils.py:1000] 2024-06-29 21:58:59,317 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"do_sample\": true,\n",
+ " \"eos_token_id\": [\n",
+ " 151645,\n",
+ " 151643\n",
+ " ],\n",
+ " \"pad_token_id\": 151643,\n",
+ " \"repetition_penalty\": 1.1,\n",
+ " \"temperature\": 0.7,\n",
+ " \"top_k\": 20,\n",
+ " \"top_p\": 0.8\n",
+ "}\n",
+ "\n",
+ "06/29/2024 21:58:59 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.\n",
+ "06/29/2024 21:58:59 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
+ "06/29/2024 21:58:59 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.\n",
+ "06/29/2024 21:58:59 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA\n",
+ "06/29/2024 21:58:59 - INFO - llamafactory.model.model_utils.misc - Found linear modules: q_proj,up_proj,k_proj,v_proj,gate_proj,down_proj,o_proj\n",
+ "06/29/2024 21:58:59 - INFO - llamafactory.model.loader - trainable params: 4,399,104 || all params: 498,431,872 || trainable%: 0.8826\n",
+ "[INFO|trainer.py:642] 2024-06-29 21:58:59,830 >> Using auto half precision backend\n",
+ "06/29/2024 21:58:59 - WARNING - llamafactory.train.callbacks - Previous trainer log in this folder will be deleted.\n",
+ "[INFO|trainer.py:2128] 2024-06-29 21:58:59,963 >> ***** Running training *****\n",
+ "[INFO|trainer.py:2129] 2024-06-29 21:58:59,963 >> Num examples = 4,482\n",
+ "[INFO|trainer.py:2130] 2024-06-29 21:58:59,963 >> Num Epochs = 10\n",
+ "[INFO|trainer.py:2131] 2024-06-29 21:58:59,963 >> Instantaneous batch size per device = 1\n",
+ "[INFO|trainer.py:2134] 2024-06-29 21:58:59,963 >> Total train batch size (w. parallel, distributed & accumulation) = 8\n",
+ "[INFO|trainer.py:2135] 2024-06-29 21:58:59,963 >> Gradient Accumulation steps = 8\n",
+ "[INFO|trainer.py:2136] 2024-06-29 21:58:59,963 >> Total optimization steps = 5,600\n",
+ "[INFO|trainer.py:2137] 2024-06-29 21:58:59,964 >> Number of trainable parameters = 4,399,104\n",
+ "{'loss': 2.5824, 'grad_norm': 3.00181245803833, 'learning_rate': 1.7857142857142857e-06, 'epoch': 0.02}\n",
+ "{'loss': 2.7043, 'grad_norm': 3.7918665409088135, 'learning_rate': 3.5714285714285714e-06, 'epoch': 0.04}\n",
+ "{'loss': 2.5845, 'grad_norm': 2.4548499584198, 'learning_rate': 5.357142857142857e-06, 'epoch': 0.05}\n",
+ "{'loss': 2.5238, 'grad_norm': 5.136275291442871, 'learning_rate': 7.142857142857143e-06, 'epoch': 0.07}\n",
+ "{'loss': 2.7407, 'grad_norm': 2.911478281021118, 'learning_rate': 8.92857142857143e-06, 'epoch': 0.09}\n",
+ "{'loss': 2.4438, 'grad_norm': 2.7009449005126953, 'learning_rate': 1.0714285714285714e-05, 'epoch': 0.11}\n",
+ "{'loss': 2.619, 'grad_norm': 2.6438188552856445, 'learning_rate': 1.25e-05, 'epoch': 0.12}\n",
+ "{'loss': 2.3602, 'grad_norm': 2.3748607635498047, 'learning_rate': 1.4285714285714285e-05, 'epoch': 0.14}\n",
+ "{'loss': 2.5023, 'grad_norm': 2.8664743900299072, 'learning_rate': 1.6071428571428572e-05, 'epoch': 0.16}\n",
+ "{'loss': 2.3225, 'grad_norm': 2.3505067825317383, 'learning_rate': 1.785714285714286e-05, 'epoch': 0.18}\n",
+ "{'loss': 2.3869, 'grad_norm': 3.261944532394409, 'learning_rate': 1.9642857142857145e-05, 'epoch': 0.2}\n",
+ "{'loss': 2.3922, 'grad_norm': 2.6836485862731934, 'learning_rate': 2.1428571428571428e-05, 'epoch': 0.21}\n",
+ "{'loss': 2.3024, 'grad_norm': 2.848069667816162, 'learning_rate': 2.3214285714285715e-05, 'epoch': 0.23}\n",
+ "{'loss': 2.3501, 'grad_norm': 3.22798752784729, 'learning_rate': 2.5e-05, 'epoch': 0.25}\n",
+ "{'loss': 2.2154, 'grad_norm': 2.441416025161743, 'learning_rate': 2.6785714285714288e-05, 'epoch': 0.27}\n",
+ "{'loss': 2.2651, 'grad_norm': 2.3891408443450928, 'learning_rate': 2.857142857142857e-05, 'epoch': 0.29}\n",
+ "{'loss': 2.333, 'grad_norm': 2.3359410762786865, 'learning_rate': 3.0357142857142857e-05, 'epoch': 0.3}\n",
+ "{'loss': 2.1135, 'grad_norm': 2.6461141109466553, 'learning_rate': 3.2142857142857144e-05, 'epoch': 0.32}\n",
+ "{'loss': 2.2379, 'grad_norm': 3.4454798698425293, 'learning_rate': 3.392857142857143e-05, 'epoch': 0.34}\n",
+ "{'loss': 2.4006, 'grad_norm': 2.9662983417510986, 'learning_rate': 3.571428571428572e-05, 'epoch': 0.36}\n",
+ "{'loss': 2.3065, 'grad_norm': 2.796970844268799, 'learning_rate': 3.7500000000000003e-05, 'epoch': 0.37}\n",
+ "{'loss': 2.2302, 'grad_norm': 3.6208152770996094, 'learning_rate': 3.928571428571429e-05, 'epoch': 0.39}\n",
+ "{'loss': 2.1966, 'grad_norm': 3.335953950881958, 'learning_rate': 4.107142857142857e-05, 'epoch': 0.41}\n",
+ "{'loss': 2.3829, 'grad_norm': 4.235249042510986, 'learning_rate': 4.2857142857142856e-05, 'epoch': 0.43}\n",
+ "{'loss': 2.2592, 'grad_norm': 3.228585720062256, 'learning_rate': 4.464285714285715e-05, 'epoch': 0.45}\n",
+ "{'loss': 2.2236, 'grad_norm': 3.2165491580963135, 'learning_rate': 4.642857142857143e-05, 'epoch': 0.46}\n",
+ "{'loss': 2.2113, 'grad_norm': 4.193121433258057, 'learning_rate': 4.8214285714285716e-05, 'epoch': 0.48}\n",
+ "{'loss': 2.3292, 'grad_norm': 4.554675579071045, 'learning_rate': 5e-05, 'epoch': 0.5}\n",
+ "{'loss': 2.1239, 'grad_norm': 2.7911994457244873, 'learning_rate': 5.1785714285714296e-05, 'epoch': 0.52}\n",
+ "{'loss': 2.2483, 'grad_norm': 3.6781301498413086, 'learning_rate': 5.3571428571428575e-05, 'epoch': 0.54}\n",
+ "{'loss': 2.2574, 'grad_norm': 4.210690021514893, 'learning_rate': 5.535714285714286e-05, 'epoch': 0.55}\n",
+ "{'loss': 2.0374, 'grad_norm': 6.651491165161133, 'learning_rate': 5.714285714285714e-05, 'epoch': 0.57}\n",
+ "{'loss': 2.1021, 'grad_norm': 5.034158706665039, 'learning_rate': 5.8928571428571435e-05, 'epoch': 0.59}\n",
+ "{'loss': 2.1575, 'grad_norm': 4.4245381355285645, 'learning_rate': 6.0714285714285715e-05, 'epoch': 0.61}\n",
+ "{'loss': 2.1584, 'grad_norm': 4.884017467498779, 'learning_rate': 6.25e-05, 'epoch': 0.62}\n",
+ "{'loss': 2.0592, 'grad_norm': 3.4757015705108643, 'learning_rate': 6.428571428571429e-05, 'epoch': 0.64}\n",
+ "{'loss': 2.2959, 'grad_norm': 4.756143093109131, 'learning_rate': 6.607142857142857e-05, 'epoch': 0.66}\n",
+ "{'loss': 2.2236, 'grad_norm': 3.61995005607605, 'learning_rate': 6.785714285714286e-05, 'epoch': 0.68}\n",
+ "{'loss': 1.9521, 'grad_norm': 3.775660991668701, 'learning_rate': 6.964285714285715e-05, 'epoch': 0.7}\n",
+ "{'loss': 2.1048, 'grad_norm': 3.84194016456604, 'learning_rate': 7.142857142857143e-05, 'epoch': 0.71}\n",
+ "{'loss': 2.2049, 'grad_norm': 3.697145462036133, 'learning_rate': 7.321428571428571e-05, 'epoch': 0.73}\n",
+ "{'loss': 2.2091, 'grad_norm': 3.071280002593994, 'learning_rate': 7.500000000000001e-05, 'epoch': 0.75}\n",
+ "{'loss': 2.1879, 'grad_norm': 3.8867111206054688, 'learning_rate': 7.67857142857143e-05, 'epoch': 0.77}\n",
+ "{'loss': 2.0959, 'grad_norm': 4.871102333068848, 'learning_rate': 7.857142857142858e-05, 'epoch': 0.79}\n",
+ "{'loss': 2.0237, 'grad_norm': 2.9602854251861572, 'learning_rate': 8.035714285714287e-05, 'epoch': 0.8}\n",
+ "{'loss': 2.12, 'grad_norm': 3.3257362842559814, 'learning_rate': 8.214285714285714e-05, 'epoch': 0.82}\n",
+ "{'loss': 2.1227, 'grad_norm': 5.4583024978637695, 'learning_rate': 8.392857142857144e-05, 'epoch': 0.84}\n",
+ "{'loss': 2.1448, 'grad_norm': 3.455509901046753, 'learning_rate': 8.571428571428571e-05, 'epoch': 0.86}\n",
+ "{'loss': 2.138, 'grad_norm': 2.953312397003174, 'learning_rate': 8.75e-05, 'epoch': 0.87}\n",
+ "{'loss': 2.3248, 'grad_norm': 3.1288394927978516, 'learning_rate': 8.92857142857143e-05, 'epoch': 0.89}\n",
+ "{'loss': 2.2541, 'grad_norm': 3.630788803100586, 'learning_rate': 9.107142857142857e-05, 'epoch': 0.91}\n",
+ "{'loss': 2.1579, 'grad_norm': 4.1369805335998535, 'learning_rate': 9.285714285714286e-05, 'epoch': 0.93}\n",
+ "{'loss': 2.1881, 'grad_norm': 3.945438861846924, 'learning_rate': 9.464285714285715e-05, 'epoch': 0.95}\n",
+ "{'loss': 2.1433, 'grad_norm': 3.308486223220825, 'learning_rate': 9.642857142857143e-05, 'epoch': 0.96}\n",
+ "{'loss': 2.1414, 'grad_norm': 3.59633207321167, 'learning_rate': 9.821428571428572e-05, 'epoch': 0.98}\n",
+ "{'loss': 2.1674, 'grad_norm': 3.1946074962615967, 'learning_rate': 0.0001, 'epoch': 1.0}\n",
+ " 10%|███▊ | 560/5600 [11:54<1:46:21, 1.27s/it][INFO|trainer.py:3788] 2024-06-29 22:10:54,528 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-06-29 22:10:54,528 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-06-29 22:10:54,528 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 9%|███▊ | 4/46 [00:00<00:01, 36.47it/s]\u001b[A\n",
+ " 17%|███████▋ | 8/46 [00:00<00:01, 29.70it/s]\u001b[A\n",
+ " 26%|███████████▏ | 12/46 [00:00<00:01, 27.30it/s]\u001b[A\n",
+ " 33%|██████████████ | 15/46 [00:00<00:01, 26.35it/s]\u001b[A\n",
+ " 39%|████████████████▊ | 18/46 [00:00<00:01, 25.73it/s]\u001b[A\n",
+ " 46%|███████████████████▋ | 21/46 [00:00<00:00, 25.56it/s]\u001b[A\n",
+ " 52%|██████████████████████▍ | 24/46 [00:00<00:00, 23.84it/s]\u001b[A\n",
+ " 59%|█████████████████████████▏ | 27/46 [00:01<00:00, 23.72it/s]\u001b[A\n",
+ " 65%|████████████████████████████ | 30/46 [00:01<00:00, 23.16it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▊ | 33/46 [00:01<00:00, 22.87it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▋ | 36/46 [00:01<00:00, 23.02it/s]\u001b[A\n",
+ " 85%|████████████████████████████████████▍ | 39/46 [00:01<00:00, 23.66it/s]\u001b[A\n",
+ " 91%|███████████████████████████████████████▎ | 42/46 [00:01<00:00, 24.46it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 2.084639310836792, 'eval_runtime': 1.9003, 'eval_samples_per_second': 24.206, 'eval_steps_per_second': 24.206, 'epoch': 1.0}\n",
+ " 10%|███▊ | 560/5600 [11:56<1:46:21, 1.27s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:01<00:00, 24.20it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-06-29 22:10:56,429 >> Saving model checkpoint to saves/qwen2-0.5b/lora/sft/checkpoint-560\n",
+ "[INFO|configuration_utils.py:733] 2024-06-29 22:10:57,646 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-06-29 22:10:57,646 >> Model config Qwen2Config {\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-06-29 22:10:57,680 >> tokenizer config file saved in saves/qwen2-0.5b/lora/sft/checkpoint-560/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-06-29 22:10:57,680 >> Special tokens file saved in saves/qwen2-0.5b/lora/sft/checkpoint-560/special_tokens_map.json\n",
+ "{'loss': 2.0604, 'grad_norm': 3.1620354652404785, 'learning_rate': 9.999902864657691e-05, 'epoch': 1.02}\n",
+ "{'loss': 1.8643, 'grad_norm': 3.8117380142211914, 'learning_rate': 9.999611462404875e-05, 'epoch': 1.04}\n",
+ "{'loss': 2.0455, 'grad_norm': 3.2619926929473877, 'learning_rate': 9.999125804563732e-05, 'epoch': 1.05}\n",
+ "{'loss': 1.9864, 'grad_norm': 4.930575370788574, 'learning_rate': 9.998445910004082e-05, 'epoch': 1.07}\n",
+ "{'loss': 1.9508, 'grad_norm': 3.7913410663604736, 'learning_rate': 9.997571805142639e-05, 'epoch': 1.09}\n",
+ "{'loss': 1.9803, 'grad_norm': 4.443136215209961, 'learning_rate': 9.996503523941994e-05, 'epoch': 1.11}\n",
+ "{'loss': 1.9275, 'grad_norm': 3.6109349727630615, 'learning_rate': 9.99524110790929e-05, 'epoch': 1.12}\n",
+ "{'loss': 1.9954, 'grad_norm': 5.655592918395996, 'learning_rate': 9.993784606094612e-05, 'epoch': 1.14}\n",
+ "{'loss': 1.9877, 'grad_norm': 3.884321928024292, 'learning_rate': 9.992134075089084e-05, 'epoch': 1.16}\n",
+ "{'loss': 1.8812, 'grad_norm': 3.8242244720458984, 'learning_rate': 9.99028957902266e-05, 'epoch': 1.18}\n",
+ "{'loss': 1.8684, 'grad_norm': 2.90846586227417, 'learning_rate': 9.988251189561645e-05, 'epoch': 1.2}\n",
+ "{'loss': 1.993, 'grad_norm': 3.7888333797454834, 'learning_rate': 9.986018985905901e-05, 'epoch': 1.21}\n",
+ "{'loss': 1.8779, 'grad_norm': 4.632900714874268, 'learning_rate': 9.983593054785776e-05, 'epoch': 1.23}\n",
+ "{'loss': 1.999, 'grad_norm': 4.890506267547607, 'learning_rate': 9.980973490458728e-05, 'epoch': 1.25}\n",
+ "{'loss': 1.9202, 'grad_norm': 4.923672676086426, 'learning_rate': 9.978160394705668e-05, 'epoch': 1.27}\n",
+ "{'loss': 2.1275, 'grad_norm': 4.535311222076416, 'learning_rate': 9.975153876827008e-05, 'epoch': 1.29}\n",
+ "{'loss': 2.0239, 'grad_norm': 3.3138980865478516, 'learning_rate': 9.971954053638399e-05, 'epoch': 1.3}\n",
+ "{'loss': 2.0188, 'grad_norm': 3.4345853328704834, 'learning_rate': 9.968561049466214e-05, 'epoch': 1.32}\n",
+ "{'loss': 2.0142, 'grad_norm': 8.660140991210938, 'learning_rate': 9.964974996142698e-05, 'epoch': 1.34}\n",
+ "{'loss': 1.7658, 'grad_norm': 4.349238872528076, 'learning_rate': 9.961196033000861e-05, 'epoch': 1.36}\n",
+ "{'loss': 1.9781, 'grad_norm': 4.784688949584961, 'learning_rate': 9.957224306869053e-05, 'epoch': 1.37}\n",
+ "{'loss': 1.8478, 'grad_norm': 5.488619327545166, 'learning_rate': 9.953059972065265e-05, 'epoch': 1.39}\n",
+ "{'loss': 1.719, 'grad_norm': 3.712329387664795, 'learning_rate': 9.948703190391131e-05, 'epoch': 1.41}\n",
+ "{'loss': 2.0146, 'grad_norm': 4.957381248474121, 'learning_rate': 9.944154131125642e-05, 'epoch': 1.43}\n",
+ "{'loss': 1.8996, 'grad_norm': 4.802273273468018, 'learning_rate': 9.939412971018574e-05, 'epoch': 1.45}\n",
+ "{'loss': 1.9122, 'grad_norm': 3.6675291061401367, 'learning_rate': 9.934479894283606e-05, 'epoch': 1.46}\n",
+ "{'loss': 2.0716, 'grad_norm': 3.885627031326294, 'learning_rate': 9.92935509259118e-05, 'epoch': 1.48}\n",
+ "{'loss': 1.8866, 'grad_norm': 5.027438640594482, 'learning_rate': 9.924038765061042e-05, 'epoch': 1.5}\n",
+ "{'loss': 1.8188, 'grad_norm': 3.39078426361084, 'learning_rate': 9.918531118254507e-05, 'epoch': 1.52}\n",
+ "{'loss': 1.948, 'grad_norm': 4.390409469604492, 'learning_rate': 9.912832366166442e-05, 'epoch': 1.54}\n",
+ "{'loss': 1.9499, 'grad_norm': 5.019458770751953, 'learning_rate': 9.906942730216939e-05, 'epoch': 1.55}\n",
+ "{'loss': 1.8564, 'grad_norm': 3.9593818187713623, 'learning_rate': 9.900862439242719e-05, 'epoch': 1.57}\n",
+ "{'loss': 1.9739, 'grad_norm': 4.117242336273193, 'learning_rate': 9.894591729488242e-05, 'epoch': 1.59}\n",
+ "{'loss': 2.0614, 'grad_norm': 3.597482204437256, 'learning_rate': 9.888130844596524e-05, 'epoch': 1.61}\n",
+ "{'loss': 1.8292, 'grad_norm': 3.4714455604553223, 'learning_rate': 9.881480035599667e-05, 'epoch': 1.62}\n",
+ "{'loss': 1.8707, 'grad_norm': 3.4483628273010254, 'learning_rate': 9.874639560909117e-05, 'epoch': 1.64}\n",
+ "{'loss': 1.8787, 'grad_norm': 3.199208974838257, 'learning_rate': 9.867609686305617e-05, 'epoch': 1.66}\n",
+ "{'loss': 1.8856, 'grad_norm': 3.4779880046844482, 'learning_rate': 9.860390684928873e-05, 'epoch': 1.68}\n",
+ "{'loss': 1.8618, 'grad_norm': 5.559018135070801, 'learning_rate': 9.852982837266955e-05, 'epoch': 1.7}\n",
+ "{'loss': 1.864, 'grad_norm': 4.512182235717773, 'learning_rate': 9.84538643114539e-05, 'epoch': 1.71}\n",
+ "{'loss': 1.9054, 'grad_norm': 3.1477646827697754, 'learning_rate': 9.837601761715983e-05, 'epoch': 1.73}\n",
+ "{'loss': 2.0045, 'grad_norm': 3.805159091949463, 'learning_rate': 9.829629131445342e-05, 'epoch': 1.75}\n",
+ "{'loss': 1.9549, 'grad_norm': 3.356356143951416, 'learning_rate': 9.82146885010314e-05, 'epoch': 1.77}\n",
+ "{'loss': 1.8738, 'grad_norm': 4.890620231628418, 'learning_rate': 9.81312123475006e-05, 'epoch': 1.78}\n",
+ "{'loss': 1.906, 'grad_norm': 3.6688284873962402, 'learning_rate': 9.804586609725499e-05, 'epoch': 1.8}\n",
+ "{'loss': 1.8104, 'grad_norm': 3.987600564956665, 'learning_rate': 9.79586530663494e-05, 'epoch': 1.82}\n",
+ "{'loss': 1.7931, 'grad_norm': 3.517052173614502, 'learning_rate': 9.78695766433709e-05, 'epoch': 1.84}\n",
+ "{'loss': 1.984, 'grad_norm': 3.507730722427368, 'learning_rate': 9.777864028930705e-05, 'epoch': 1.86}\n",
+ "{'loss': 1.8427, 'grad_norm': 4.782810211181641, 'learning_rate': 9.768584753741134e-05, 'epoch': 1.87}\n",
+ "{'loss': 1.8765, 'grad_norm': 4.302423000335693, 'learning_rate': 9.759120199306613e-05, 'epoch': 1.89}\n",
+ "{'loss': 2.0702, 'grad_norm': 4.296674728393555, 'learning_rate': 9.74947073336423e-05, 'epoch': 1.91}\n",
+ "{'loss': 2.0158, 'grad_norm': 4.246646881103516, 'learning_rate': 9.73963673083566e-05, 'epoch': 1.93}\n",
+ "{'loss': 1.9104, 'grad_norm': 3.5928955078125, 'learning_rate': 9.72961857381258e-05, 'epoch': 1.95}\n",
+ "{'loss': 1.7656, 'grad_norm': 3.674893379211426, 'learning_rate': 9.719416651541839e-05, 'epoch': 1.96}\n",
+ "{'loss': 1.8906, 'grad_norm': 3.089376211166382, 'learning_rate': 9.709031360410318e-05, 'epoch': 1.98}\n",
+ "{'loss': 1.9109, 'grad_norm': 4.134565830230713, 'learning_rate': 9.698463103929542e-05, 'epoch': 2.0}\n",
+ " 20%|███████▍ | 1120/5600 [23:38<1:33:12, 1.25s/it][INFO|trainer.py:3788] 2024-06-29 22:22:38,708 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-06-29 22:22:38,709 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-06-29 22:22:38,709 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 9%|███▊ | 4/46 [00:00<00:01, 36.96it/s]\u001b[A\n",
+ " 17%|███████▋ | 8/46 [00:00<00:01, 29.81it/s]\u001b[A\n",
+ " 26%|███████████▏ | 12/46 [00:00<00:01, 27.33it/s]\u001b[A\n",
+ " 33%|██████████████ | 15/46 [00:00<00:01, 27.15it/s]\u001b[A\n",
+ " 39%|████████████████▊ | 18/46 [00:00<00:01, 27.40it/s]\u001b[A\n",
+ " 46%|███████████████████▋ | 21/46 [00:00<00:00, 27.60it/s]\u001b[A\n",
+ " 52%|██████████████████████▍ | 24/46 [00:00<00:00, 26.59it/s]\u001b[A\n",
+ " 59%|█████████████████████████▏ | 27/46 [00:00<00:00, 25.78it/s]\u001b[A\n",
+ " 65%|████████████████████████████ | 30/46 [00:01<00:00, 25.93it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▊ | 33/46 [00:01<00:00, 26.01it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▋ | 36/46 [00:01<00:00, 26.28it/s]\u001b[A\n",
+ " 85%|████████████████████████████████████▍ | 39/46 [00:01<00:00, 25.90it/s]\u001b[A\n",
+ " 91%|███████████████████████████████████████▎ | 42/46 [00:01<00:00, 25.86it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 2.014204502105713, 'eval_runtime': 1.7748, 'eval_samples_per_second': 25.919, 'eval_steps_per_second': 25.919, 'epoch': 2.0}\n",
+ " 20%|███████▍ | 1120/5600 [23:40<1:33:12, 1.25s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:01<00:00, 25.25it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-06-29 22:22:40,485 >> Saving model checkpoint to saves/qwen2-0.5b/lora/sft/checkpoint-1120\n",
+ "[INFO|configuration_utils.py:733] 2024-06-29 22:22:41,066 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-06-29 22:22:41,066 >> Model config Qwen2Config {\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-06-29 22:22:41,093 >> tokenizer config file saved in saves/qwen2-0.5b/lora/sft/checkpoint-1120/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-06-29 22:22:41,093 >> Special tokens file saved in saves/qwen2-0.5b/lora/sft/checkpoint-1120/special_tokens_map.json\n",
+ "{'loss': 1.6353, 'grad_norm': 4.750072479248047, 'learning_rate': 9.687712292719997e-05, 'epoch': 2.02}\n",
+ "{'loss': 1.4907, 'grad_norm': 3.403005838394165, 'learning_rate': 9.67677934449517e-05, 'epoch': 2.03}\n",
+ "{'loss': 1.5814, 'grad_norm': 4.471570014953613, 'learning_rate': 9.665664684045333e-05, 'epoch': 2.05}\n",
+ "{'loss': 1.5284, 'grad_norm': 5.069768905639648, 'learning_rate': 9.654368743221022e-05, 'epoch': 2.07}\n",
+ "{'loss': 1.424, 'grad_norm': 3.740079641342163, 'learning_rate': 9.642891960916268e-05, 'epoch': 2.09}\n",
+ "{'loss': 1.5123, 'grad_norm': 3.6874232292175293, 'learning_rate': 9.631234783051544e-05, 'epoch': 2.11}\n",
+ "{'loss': 1.7386, 'grad_norm': 4.291646480560303, 'learning_rate': 9.619397662556435e-05, 'epoch': 2.12}\n",
+ "{'loss': 1.5025, 'grad_norm': 4.665364742279053, 'learning_rate': 9.607381059352038e-05, 'epoch': 2.14}\n",
+ "{'loss': 1.5494, 'grad_norm': 4.6409173011779785, 'learning_rate': 9.595185440333103e-05, 'epoch': 2.16}\n",
+ "{'loss': 1.596, 'grad_norm': 5.967792987823486, 'learning_rate': 9.582811279349882e-05, 'epoch': 2.18}\n",
+ "{'loss': 1.5333, 'grad_norm': 3.9247050285339355, 'learning_rate': 9.570259057189717e-05, 'epoch': 2.2}\n",
+ "{'loss': 1.5773, 'grad_norm': 5.318151950836182, 'learning_rate': 9.557529261558367e-05, 'epoch': 2.21}\n",
+ "{'loss': 1.6159, 'grad_norm': 5.290398120880127, 'learning_rate': 9.544622387061055e-05, 'epoch': 2.23}\n",
+ "{'loss': 1.6438, 'grad_norm': 4.89390230178833, 'learning_rate': 9.53153893518325e-05, 'epoch': 2.25}\n",
+ "{'loss': 1.486, 'grad_norm': 4.651273250579834, 'learning_rate': 9.518279414271183e-05, 'epoch': 2.27}\n",
+ "{'loss': 1.531, 'grad_norm': 5.672192573547363, 'learning_rate': 9.504844339512095e-05, 'epoch': 2.28}\n",
+ "{'loss': 1.5734, 'grad_norm': 3.6605958938598633, 'learning_rate': 9.491234232914221e-05, 'epoch': 2.3}\n",
+ "{'loss': 1.6449, 'grad_norm': 4.812197685241699, 'learning_rate': 9.477449623286505e-05, 'epoch': 2.32}\n",
+ "{'loss': 1.659, 'grad_norm': 4.542179584503174, 'learning_rate': 9.463491046218058e-05, 'epoch': 2.34}\n",
+ "{'loss': 1.6723, 'grad_norm': 4.588232517242432, 'learning_rate': 9.449359044057345e-05, 'epoch': 2.36}\n",
+ "{'loss': 1.619, 'grad_norm': 6.938955783843994, 'learning_rate': 9.435054165891109e-05, 'epoch': 2.37}\n",
+ "{'loss': 1.7966, 'grad_norm': 4.723308563232422, 'learning_rate': 9.420576967523049e-05, 'epoch': 2.39}\n",
+ "{'loss': 1.6444, 'grad_norm': 4.64656925201416, 'learning_rate': 9.405928011452211e-05, 'epoch': 2.41}\n",
+ "{'loss': 1.5322, 'grad_norm': 5.396662712097168, 'learning_rate': 9.391107866851143e-05, 'epoch': 2.43}\n",
+ "{'loss': 1.6074, 'grad_norm': 4.109992027282715, 'learning_rate': 9.376117109543769e-05, 'epoch': 2.45}\n",
+ "{'loss': 1.5129, 'grad_norm': 4.073942184448242, 'learning_rate': 9.360956321983028e-05, 'epoch': 2.46}\n",
+ "{'loss': 1.5839, 'grad_norm': 5.0658698081970215, 'learning_rate': 9.345626093228233e-05, 'epoch': 2.48}\n",
+ "{'loss': 1.726, 'grad_norm': 4.494250297546387, 'learning_rate': 9.330127018922194e-05, 'epoch': 2.5}\n",
+ "{'loss': 1.6129, 'grad_norm': 5.197183609008789, 'learning_rate': 9.314459701268065e-05, 'epoch': 2.52}\n",
+ "{'loss': 1.5691, 'grad_norm': 4.414649486541748, 'learning_rate': 9.298624749005951e-05, 'epoch': 2.53}\n",
+ "{'loss': 1.6516, 'grad_norm': 6.023291110992432, 'learning_rate': 9.282622777389258e-05, 'epoch': 2.55}\n",
+ "{'loss': 1.4793, 'grad_norm': 5.750635147094727, 'learning_rate': 9.266454408160779e-05, 'epoch': 2.57}\n",
+ "{'loss': 1.761, 'grad_norm': 6.335220813751221, 'learning_rate': 9.250120269528546e-05, 'epoch': 2.59}\n",
+ "{'loss': 1.5627, 'grad_norm': 6.77303409576416, 'learning_rate': 9.233620996141421e-05, 'epoch': 2.61}\n",
+ "{'loss': 1.656, 'grad_norm': 3.9022696018218994, 'learning_rate': 9.21695722906443e-05, 'epoch': 2.62}\n",
+ "{'loss': 1.5537, 'grad_norm': 3.297802209854126, 'learning_rate': 9.200129615753859e-05, 'epoch': 2.64}\n",
+ "{'loss': 1.5451, 'grad_norm': 4.561464309692383, 'learning_rate': 9.183138810032099e-05, 'epoch': 2.66}\n",
+ "{'loss': 1.7119, 'grad_norm': 5.242650508880615, 'learning_rate': 9.165985472062246e-05, 'epoch': 2.68}\n",
+ "{'loss': 1.499, 'grad_norm': 5.535559177398682, 'learning_rate': 9.148670268322438e-05, 'epoch': 2.7}\n",
+ "{'loss': 1.4735, 'grad_norm': 5.1633100509643555, 'learning_rate': 9.131193871579975e-05, 'epoch': 2.71}\n",
+ "{'loss': 1.7502, 'grad_norm': 5.2197184562683105, 'learning_rate': 9.113556960865167e-05, 'epoch': 2.73}\n",
+ "{'loss': 1.6312, 'grad_norm': 4.655239105224609, 'learning_rate': 9.09576022144496e-05, 'epoch': 2.75}\n",
+ "{'loss': 1.6455, 'grad_norm': 4.8979644775390625, 'learning_rate': 9.077804344796302e-05, 'epoch': 2.77}\n",
+ "{'loss': 1.6535, 'grad_norm': 4.097564220428467, 'learning_rate': 9.059690028579284e-05, 'epoch': 2.78}\n",
+ "{'loss': 1.5067, 'grad_norm': 3.5800154209136963, 'learning_rate': 9.041417976610027e-05, 'epoch': 2.8}\n",
+ "{'loss': 1.5709, 'grad_norm': 4.4109787940979, 'learning_rate': 9.022988898833342e-05, 'epoch': 2.82}\n",
+ "{'loss': 1.5517, 'grad_norm': 4.352450370788574, 'learning_rate': 9.004403511295141e-05, 'epoch': 2.84}\n",
+ "{'loss': 1.5259, 'grad_norm': 6.1658525466918945, 'learning_rate': 8.985662536114613e-05, 'epoch': 2.86}\n",
+ "{'loss': 1.7768, 'grad_norm': 4.468559265136719, 'learning_rate': 8.966766701456177e-05, 'epoch': 2.87}\n",
+ "{'loss': 1.5683, 'grad_norm': 4.119050979614258, 'learning_rate': 8.947716741501177e-05, 'epoch': 2.89}\n",
+ "{'loss': 1.5665, 'grad_norm': 5.216476917266846, 'learning_rate': 8.928513396419368e-05, 'epoch': 2.91}\n",
+ "{'loss': 1.524, 'grad_norm': 4.42177152633667, 'learning_rate': 8.90915741234015e-05, 'epoch': 2.93}\n",
+ "{'loss': 1.7169, 'grad_norm': 4.006609916687012, 'learning_rate': 8.889649541323574e-05, 'epoch': 2.95}\n",
+ "{'loss': 1.7248, 'grad_norm': 4.3928914070129395, 'learning_rate': 8.869990541331138e-05, 'epoch': 2.96}\n",
+ "{'loss': 1.5582, 'grad_norm': 5.347744464874268, 'learning_rate': 8.850181176196315e-05, 'epoch': 2.98}\n",
+ "{'loss': 1.6926, 'grad_norm': 4.4702019691467285, 'learning_rate': 8.83022221559489e-05, 'epoch': 3.0}\n",
+ " 30%|███████████ | 1680/5600 [35:12<1:20:28, 1.23s/it][INFO|trainer.py:3788] 2024-06-29 22:34:12,572 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-06-29 22:34:12,573 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-06-29 22:34:12,573 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 9%|███▊ | 4/46 [00:00<00:01, 31.72it/s]\u001b[A\n",
+ " 17%|███████▋ | 8/46 [00:00<00:01, 28.02it/s]\u001b[A\n",
+ " 24%|██████████▎ | 11/46 [00:00<00:01, 27.68it/s]\u001b[A\n",
+ " 30%|█████████████ | 14/46 [00:00<00:01, 24.83it/s]\u001b[A\n",
+ " 37%|███████████████▉ | 17/46 [00:00<00:01, 25.62it/s]\u001b[A\n",
+ " 43%|██████████████████▋ | 20/46 [00:00<00:00, 26.00it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 23/46 [00:00<00:00, 24.73it/s]\u001b[A\n",
+ " 57%|████████████████████████▎ | 26/46 [00:01<00:00, 25.05it/s]\u001b[A\n",
+ " 63%|███████████████████████████ | 29/46 [00:01<00:00, 24.42it/s]\u001b[A\n",
+ " 70%|█████████████████████████████▉ | 32/46 [00:01<00:00, 25.46it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 35/46 [00:01<00:00, 25.59it/s]\u001b[A\n",
+ " 83%|███████████████████████████████████▌ | 38/46 [00:01<00:00, 25.79it/s]\u001b[A\n",
+ " 89%|██████████████████████████████████████▎ | 41/46 [00:01<00:00, 25.49it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 2.074129343032837, 'eval_runtime': 1.8246, 'eval_samples_per_second': 25.211, 'eval_steps_per_second': 25.211, 'epoch': 3.0}\n",
+ " 30%|███████████ | 1680/5600 [35:14<1:20:28, 1.23s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:01<00:00, 25.23it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-06-29 22:34:14,397 >> Saving model checkpoint to saves/qwen2-0.5b/lora/sft/checkpoint-1680\n",
+ "[INFO|configuration_utils.py:733] 2024-06-29 22:34:15,015 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-06-29 22:34:15,015 >> Model config Qwen2Config {\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-06-29 22:34:15,046 >> tokenizer config file saved in saves/qwen2-0.5b/lora/sft/checkpoint-1680/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-06-29 22:34:15,047 >> Special tokens file saved in saves/qwen2-0.5b/lora/sft/checkpoint-1680/special_tokens_map.json\n",
+ "{'loss': 1.4704, 'grad_norm': 4.0504021644592285, 'learning_rate': 8.810114435015054e-05, 'epoch': 3.02}\n",
+ "{'loss': 1.1325, 'grad_norm': 4.55043363571167, 'learning_rate': 8.789858615727265e-05, 'epoch': 3.03}\n",
+ "{'loss': 1.3183, 'grad_norm': 5.89686393737793, 'learning_rate': 8.7694555447539e-05, 'epoch': 3.05}\n",
+ "{'loss': 1.2266, 'grad_norm': 6.354063510894775, 'learning_rate': 8.748906014838672e-05, 'epoch': 3.07}\n",
+ "{'loss': 1.2011, 'grad_norm': 5.328189849853516, 'learning_rate': 8.728210824415827e-05, 'epoch': 3.09}\n",
+ "{'loss': 1.3191, 'grad_norm': 5.733210563659668, 'learning_rate': 8.707370777579133e-05, 'epoch': 3.11}\n",
+ "{'loss': 1.242, 'grad_norm': 4.455051422119141, 'learning_rate': 8.68638668405062e-05, 'epoch': 3.12}\n",
+ "{'loss': 1.3565, 'grad_norm': 5.194347381591797, 'learning_rate': 8.665259359149132e-05, 'epoch': 3.14}\n",
+ "{'loss': 1.2447, 'grad_norm': 5.317370414733887, 'learning_rate': 8.643989623758643e-05, 'epoch': 3.16}\n",
+ "{'loss': 1.3055, 'grad_norm': 6.149751663208008, 'learning_rate': 8.622578304296364e-05, 'epoch': 3.18}\n",
+ "{'loss': 1.2915, 'grad_norm': 5.196756839752197, 'learning_rate': 8.601026232680634e-05, 'epoch': 3.2}\n",
+ "{'loss': 1.2714, 'grad_norm': 6.269302845001221, 'learning_rate': 8.579334246298593e-05, 'epoch': 3.21}\n",
+ "{'loss': 1.4645, 'grad_norm': 4.260131359100342, 'learning_rate': 8.557503187973651e-05, 'epoch': 3.23}\n",
+ "{'loss': 1.266, 'grad_norm': 4.698756217956543, 'learning_rate': 8.535533905932738e-05, 'epoch': 3.25}\n",
+ "{'loss': 1.242, 'grad_norm': 5.149835109710693, 'learning_rate': 8.513427253773346e-05, 'epoch': 3.27}\n",
+ "{'loss': 1.2025, 'grad_norm': 5.154805660247803, 'learning_rate': 8.491184090430364e-05, 'epoch': 3.28}\n",
+ "{'loss': 1.3668, 'grad_norm': 6.301427364349365, 'learning_rate': 8.468805280142709e-05, 'epoch': 3.3}\n",
+ "{'loss': 1.3626, 'grad_norm': 4.72573709487915, 'learning_rate': 8.446291692419736e-05, 'epoch': 3.32}\n",
+ "{'loss': 1.3326, 'grad_norm': 4.458547592163086, 'learning_rate': 8.423644202007467e-05, 'epoch': 3.34}\n",
+ "{'loss': 1.324, 'grad_norm': 4.596677303314209, 'learning_rate': 8.400863688854597e-05, 'epoch': 3.36}\n",
+ "{'loss': 1.3217, 'grad_norm': 5.230796813964844, 'learning_rate': 8.377951038078302e-05, 'epoch': 3.37}\n",
+ "{'loss': 1.2399, 'grad_norm': 4.330605983734131, 'learning_rate': 8.354907139929851e-05, 'epoch': 3.39}\n",
+ "{'loss': 1.2958, 'grad_norm': 5.9636945724487305, 'learning_rate': 8.33173288976002e-05, 'epoch': 3.41}\n",
+ "{'loss': 1.3935, 'grad_norm': 4.622984409332275, 'learning_rate': 8.308429187984297e-05, 'epoch': 3.43}\n",
+ "{'loss': 1.3421, 'grad_norm': 4.806463718414307, 'learning_rate': 8.284996940047903e-05, 'epoch': 3.44}\n",
+ "{'loss': 1.3621, 'grad_norm': 4.162802219390869, 'learning_rate': 8.261437056390606e-05, 'epoch': 3.46}\n",
+ "{'loss': 1.2849, 'grad_norm': 5.431687355041504, 'learning_rate': 8.237750452411353e-05, 'epoch': 3.48}\n",
+ "{'loss': 1.2827, 'grad_norm': 6.106764316558838, 'learning_rate': 8.213938048432697e-05, 'epoch': 3.5}\n",
+ "{'loss': 1.2381, 'grad_norm': 5.98523473739624, 'learning_rate': 8.190000769665044e-05, 'epoch': 3.52}\n",
+ "{'loss': 1.3037, 'grad_norm': 4.923933029174805, 'learning_rate': 8.1659395461707e-05, 'epoch': 3.53}\n",
+ "{'loss': 1.2708, 'grad_norm': 6.869691371917725, 'learning_rate': 8.141755312827736e-05, 'epoch': 3.55}\n",
+ "{'loss': 1.414, 'grad_norm': 4.601339340209961, 'learning_rate': 8.117449009293668e-05, 'epoch': 3.57}\n",
+ "{'loss': 1.1949, 'grad_norm': 4.767725944519043, 'learning_rate': 8.093021579968941e-05, 'epoch': 3.59}\n",
+ "{'loss': 1.2801, 'grad_norm': 4.9436211585998535, 'learning_rate': 8.068473973960238e-05, 'epoch': 3.61}\n",
+ "{'loss': 1.3493, 'grad_norm': 5.783080577850342, 'learning_rate': 8.043807145043604e-05, 'epoch': 3.62}\n",
+ "{'loss': 1.3132, 'grad_norm': 4.968575477600098, 'learning_rate': 8.019022051627388e-05, 'epoch': 3.64}\n",
+ "{'loss': 1.2486, 'grad_norm': 5.723098278045654, 'learning_rate': 7.994119656715002e-05, 'epoch': 3.66}\n",
+ "{'loss': 1.4033, 'grad_norm': 7.168787956237793, 'learning_rate': 7.969100927867507e-05, 'epoch': 3.68}\n",
+ "{'loss': 1.3969, 'grad_norm': 5.891693592071533, 'learning_rate': 7.943966837166023e-05, 'epoch': 3.69}\n",
+ "{'loss': 1.4086, 'grad_norm': 4.852097511291504, 'learning_rate': 7.91871836117395e-05, 'epoch': 3.71}\n",
+ "{'loss': 1.208, 'grad_norm': 5.643867015838623, 'learning_rate': 7.89335648089903e-05, 'epoch': 3.73}\n",
+ "{'loss': 1.3448, 'grad_norm': 5.375209808349609, 'learning_rate': 7.86788218175523e-05, 'epoch': 3.75}\n",
+ "{'loss': 1.316, 'grad_norm': 5.470929145812988, 'learning_rate': 7.842296453524463e-05, 'epoch': 3.77}\n",
+ "{'loss': 1.2369, 'grad_norm': 4.993719577789307, 'learning_rate': 7.81660029031811e-05, 'epoch': 3.78}\n",
+ "{'loss': 1.3265, 'grad_norm': 5.081270217895508, 'learning_rate': 7.79079469053842e-05, 'epoch': 3.8}\n",
+ "{'loss': 1.3366, 'grad_norm': 5.608216285705566, 'learning_rate': 7.764880656839696e-05, 'epoch': 3.82}\n",
+ "{'loss': 1.2876, 'grad_norm': 5.217581272125244, 'learning_rate': 7.738859196089358e-05, 'epoch': 3.84}\n",
+ "{'loss': 1.1134, 'grad_norm': 5.468497276306152, 'learning_rate': 7.712731319328798e-05, 'epoch': 3.86}\n",
+ "{'loss': 1.2106, 'grad_norm': 5.239170074462891, 'learning_rate': 7.68649804173412e-05, 'epoch': 3.87}\n",
+ "{'loss': 1.3169, 'grad_norm': 4.908669471740723, 'learning_rate': 7.660160382576683e-05, 'epoch': 3.89}\n",
+ "{'loss': 1.3074, 'grad_norm': 6.217924118041992, 'learning_rate': 7.633719365183504e-05, 'epoch': 3.91}\n",
+ "{'loss': 1.2273, 'grad_norm': 5.6632513999938965, 'learning_rate': 7.60717601689749e-05, 'epoch': 3.93}\n",
+ "{'loss': 1.0764, 'grad_norm': 4.552252769470215, 'learning_rate': 7.580531369037533e-05, 'epoch': 3.94}\n",
+ "{'loss': 1.5051, 'grad_norm': 5.1463823318481445, 'learning_rate': 7.553786456858429e-05, 'epoch': 3.96}\n",
+ "{'loss': 1.3593, 'grad_norm': 4.828197956085205, 'learning_rate': 7.526942319510655e-05, 'epoch': 3.98}\n",
+ "{'loss': 1.1506, 'grad_norm': 3.7453665733337402, 'learning_rate': 7.500000000000001e-05, 'epoch': 4.0}\n",
+ " 40%|██████████████▊ | 2240/5600 [46:40<1:08:55, 1.23s/it][INFO|trainer.py:3788] 2024-06-29 22:45:40,494 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-06-29 22:45:40,495 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-06-29 22:45:40,495 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 9%|███▊ | 4/46 [00:00<00:01, 32.23it/s]\u001b[A\n",
+ " 17%|███████▋ | 8/46 [00:00<00:01, 26.02it/s]\u001b[A\n",
+ " 24%|██████████▎ | 11/46 [00:00<00:01, 25.56it/s]\u001b[A\n",
+ " 30%|█████████████ | 14/46 [00:00<00:01, 26.22it/s]\u001b[A\n",
+ " 37%|███████████████▉ | 17/46 [00:00<00:01, 24.55it/s]\u001b[A\n",
+ " 43%|██████████████████▋ | 20/46 [00:00<00:01, 24.60it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 23/46 [00:00<00:00, 25.74it/s]\u001b[A\n",
+ " 57%|████████████████████████▎ | 26/46 [00:01<00:00, 25.16it/s]\u001b[A\n",
+ " 63%|███████████████████████████ | 29/46 [00:01<00:00, 23.90it/s]\u001b[A\n",
+ " 70%|█████████████████████████████▉ | 32/46 [00:01<00:00, 24.77it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 35/46 [00:01<00:00, 24.88it/s]\u001b[A\n",
+ " 83%|███████████████████████████████████▌ | 38/46 [00:01<00:00, 25.18it/s]\u001b[A\n",
+ " 89%|██████████████████████████████████████▎ | 41/46 [00:01<00:00, 24.56it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 2.1811766624450684, 'eval_runtime': 1.8622, 'eval_samples_per_second': 24.702, 'eval_steps_per_second': 24.702, 'epoch': 4.0}\n",
+ " 40%|██████████████▊ | 2240/5600 [46:42<1:08:55, 1.23s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:01<00:00, 25.50it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-06-29 22:45:42,357 >> Saving model checkpoint to saves/qwen2-0.5b/lora/sft/checkpoint-2240\n",
+ "[INFO|configuration_utils.py:733] 2024-06-29 22:45:42,919 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-06-29 22:45:42,919 >> Model config Qwen2Config {\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-06-29 22:45:42,944 >> tokenizer config file saved in saves/qwen2-0.5b/lora/sft/checkpoint-2240/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-06-29 22:45:42,944 >> Special tokens file saved in saves/qwen2-0.5b/lora/sft/checkpoint-2240/special_tokens_map.json\n",
+ "{'loss': 1.0956, 'grad_norm': 3.8211286067962646, 'learning_rate': 7.472960545147038e-05, 'epoch': 4.02}\n",
+ "{'loss': 0.9704, 'grad_norm': 4.844677448272705, 'learning_rate': 7.445825005546448e-05, 'epoch': 4.03}\n",
+ "{'loss': 1.0473, 'grad_norm': 5.256546497344971, 'learning_rate': 7.4185944355262e-05, 'epoch': 4.05}\n",
+ "{'loss': 0.9475, 'grad_norm': 5.321354389190674, 'learning_rate': 7.391269893106592e-05, 'epoch': 4.07}\n",
+ "{'loss': 0.963, 'grad_norm': 5.462662220001221, 'learning_rate': 7.363852439959135e-05, 'epoch': 4.09}\n",
+ "{'loss': 1.1535, 'grad_norm': 4.1576056480407715, 'learning_rate': 7.33634314136531e-05, 'epoch': 4.11}\n",
+ "{'loss': 0.868, 'grad_norm': 5.811347484588623, 'learning_rate': 7.308743066175172e-05, 'epoch': 4.12}\n",
+ "{'loss': 0.9822, 'grad_norm': 4.920297145843506, 'learning_rate': 7.281053286765815e-05, 'epoch': 4.14}\n",
+ "{'loss': 1.0763, 'grad_norm': 4.794600963592529, 'learning_rate': 7.253274878999727e-05, 'epoch': 4.16}\n",
+ "{'loss': 0.9458, 'grad_norm': 5.9534010887146, 'learning_rate': 7.225408922182961e-05, 'epoch': 4.18}\n",
+ "{'loss': 0.9607, 'grad_norm': 5.4286298751831055, 'learning_rate': 7.197456499023225e-05, 'epoch': 4.19}\n",
+ "{'loss': 1.0148, 'grad_norm': 5.160229206085205, 'learning_rate': 7.169418695587791e-05, 'epoch': 4.21}\n",
+ "{'loss': 0.9381, 'grad_norm': 5.876075267791748, 'learning_rate': 7.141296601261314e-05, 'epoch': 4.23}\n",
+ "{'loss': 1.0924, 'grad_norm': 4.944575786590576, 'learning_rate': 7.113091308703498e-05, 'epoch': 4.25}\n",
+ "{'loss': 0.9265, 'grad_norm': 5.3727545738220215, 'learning_rate': 7.084803913806641e-05, 'epoch': 4.27}\n",
+ "{'loss': 0.9944, 'grad_norm': 7.851329803466797, 'learning_rate': 7.056435515653059e-05, 'epoch': 4.28}\n",
+ "{'loss': 1.0168, 'grad_norm': 4.633089542388916, 'learning_rate': 7.027987216472377e-05, 'epoch': 4.3}\n",
+ "{'loss': 0.8804, 'grad_norm': 5.784977912902832, 'learning_rate': 6.999460121598704e-05, 'epoch': 4.32}\n",
+ "{'loss': 1.0768, 'grad_norm': 8.059889793395996, 'learning_rate': 6.970855339427698e-05, 'epoch': 4.34}\n",
+ "{'loss': 0.9443, 'grad_norm': 6.03590726852417, 'learning_rate': 6.942173981373474e-05, 'epoch': 4.36}\n",
+ "{'loss': 0.9489, 'grad_norm': 12.74445915222168, 'learning_rate': 6.91341716182545e-05, 'epoch': 4.37}\n",
+ "{'loss': 1.049, 'grad_norm': 5.12004280090332, 'learning_rate': 6.884585998105026e-05, 'epoch': 4.39}\n",
+ "{'loss': 1.0488, 'grad_norm': 10.477398872375488, 'learning_rate': 6.855681610422189e-05, 'epoch': 4.41}\n",
+ "{'loss': 1.1279, 'grad_norm': 5.085097789764404, 'learning_rate': 6.826705121831976e-05, 'epoch': 4.43}\n",
+ "{'loss': 1.088, 'grad_norm': 5.412323474884033, 'learning_rate': 6.797657658190839e-05, 'epoch': 4.44}\n",
+ "{'loss': 0.9124, 'grad_norm': 11.975234985351562, 'learning_rate': 6.768540348112907e-05, 'epoch': 4.46}\n",
+ "{'loss': 1.1052, 'grad_norm': 4.589102745056152, 'learning_rate': 6.739354322926136e-05, 'epoch': 4.48}\n",
+ "{'loss': 1.0427, 'grad_norm': 5.322690963745117, 'learning_rate': 6.710100716628344e-05, 'epoch': 4.5}\n",
+ "{'loss': 1.0325, 'grad_norm': 5.0710577964782715, 'learning_rate': 6.680780665843155e-05, 'epoch': 4.52}\n",
+ "{'loss': 1.059, 'grad_norm': 5.193735122680664, 'learning_rate': 6.651395309775837e-05, 'epoch': 4.53}\n",
+ "{'loss': 1.0252, 'grad_norm': 6.584225177764893, 'learning_rate': 6.621945790169036e-05, 'epoch': 4.55}\n",
+ "{'loss': 0.9911, 'grad_norm': 6.5015435218811035, 'learning_rate': 6.592433251258423e-05, 'epoch': 4.57}\n",
+ "{'loss': 1.0211, 'grad_norm': 5.898025035858154, 'learning_rate': 6.562858839728223e-05, 'epoch': 4.59}\n",
+ "{'loss': 0.9131, 'grad_norm': 5.380829811096191, 'learning_rate': 6.533223704666672e-05, 'epoch': 4.61}\n",
+ "{'loss': 1.0094, 'grad_norm': 5.253726959228516, 'learning_rate': 6.503528997521366e-05, 'epoch': 4.62}\n",
+ "{'loss': 1.0349, 'grad_norm': 4.567104339599609, 'learning_rate': 6.473775872054521e-05, 'epoch': 4.64}\n",
+ "{'loss': 1.0492, 'grad_norm': 5.842156410217285, 'learning_rate': 6.44396548429815e-05, 'epoch': 4.66}\n",
+ "{'loss': 1.0739, 'grad_norm': 5.842441082000732, 'learning_rate': 6.414098992509138e-05, 'epoch': 4.68}\n",
+ "{'loss': 0.9745, 'grad_norm': 5.929434299468994, 'learning_rate': 6.384177557124247e-05, 'epoch': 4.69}\n",
+ "{'loss': 0.9974, 'grad_norm': 6.804376125335693, 'learning_rate': 6.354202340715026e-05, 'epoch': 4.71}\n",
+ "{'loss': 0.9628, 'grad_norm': 4.4478020668029785, 'learning_rate': 6.324174507942637e-05, 'epoch': 4.73}\n",
+ "{'loss': 0.9861, 'grad_norm': 4.888654708862305, 'learning_rate': 6.294095225512603e-05, 'epoch': 4.75}\n",
+ "{'loss': 1.0063, 'grad_norm': 5.920362949371338, 'learning_rate': 6.263965662129487e-05, 'epoch': 4.77}\n",
+ "{'loss': 1.0548, 'grad_norm': 5.524910926818848, 'learning_rate': 6.233786988451468e-05, 'epoch': 4.78}\n",
+ "{'loss': 1.0929, 'grad_norm': 4.255885601043701, 'learning_rate': 6.203560377044866e-05, 'epoch': 4.8}\n",
+ "{'loss': 0.9848, 'grad_norm': 6.053053379058838, 'learning_rate': 6.173287002338577e-05, 'epoch': 4.82}\n",
+ "{'loss': 0.9432, 'grad_norm': 5.0641655921936035, 'learning_rate': 6.142968040578449e-05, 'epoch': 4.84}\n",
+ "{'loss': 1.0374, 'grad_norm': 7.123205661773682, 'learning_rate': 6.112604669781572e-05, 'epoch': 4.85}\n",
+ "{'loss': 1.0961, 'grad_norm': 5.436131954193115, 'learning_rate': 6.0821980696905146e-05, 'epoch': 4.87}\n",
+ "{'loss': 1.0741, 'grad_norm': 5.909348964691162, 'learning_rate': 6.0517494217274794e-05, 'epoch': 4.89}\n",
+ "{'loss': 1.1491, 'grad_norm': 5.223842144012451, 'learning_rate': 6.021259908948402e-05, 'epoch': 4.91}\n",
+ "{'loss': 1.0061, 'grad_norm': 5.396011829376221, 'learning_rate': 5.9907307159969884e-05, 'epoch': 4.93}\n",
+ "{'loss': 1.1121, 'grad_norm': 5.92130184173584, 'learning_rate': 5.960163029058682e-05, 'epoch': 4.94}\n",
+ "{'loss': 1.1207, 'grad_norm': 8.12635326385498, 'learning_rate': 5.9295580358145744e-05, 'epoch': 4.96}\n",
+ "{'loss': 1.141, 'grad_norm': 6.187139511108398, 'learning_rate': 5.898916925395264e-05, 'epoch': 4.98}\n",
+ "{'loss': 1.0886, 'grad_norm': 5.036999702453613, 'learning_rate': 5.868240888334653e-05, 'epoch': 5.0}\n",
+ " 50%|██████████████████▌ | 2800/5600 [59:01<1:00:22, 1.29s/it][INFO|trainer.py:3788] 2024-06-29 22:58:01,067 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-06-29 22:58:01,067 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-06-29 22:58:01,067 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 9%|███▊ | 4/46 [00:00<00:01, 34.43it/s]\u001b[A\n",
+ " 17%|███████▋ | 8/46 [00:00<00:01, 29.26it/s]\u001b[A\n",
+ " 24%|██████████▎ | 11/46 [00:00<00:01, 26.19it/s]\u001b[A\n",
+ " 30%|█████████████ | 14/46 [00:00<00:01, 24.64it/s]\u001b[A\n",
+ " 37%|███████████████▉ | 17/46 [00:00<00:01, 24.02it/s]\u001b[A\n",
+ " 43%|██████████████████▋ | 20/46 [00:00<00:01, 24.72it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 23/46 [00:00<00:00, 24.49it/s]\u001b[A\n",
+ " 57%|████████████████████████▎ | 26/46 [00:01<00:00, 24.79it/s]\u001b[A\n",
+ " 63%|███████████████████████████ | 29/46 [00:01<00:00, 25.03it/s]\u001b[A\n",
+ " 70%|█████████████████████████████▉ | 32/46 [00:01<00:00, 23.74it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 35/46 [00:01<00:00, 24.89it/s]\u001b[A\n",
+ " 83%|███████████████████████████████████▌ | 38/46 [00:01<00:00, 25.06it/s]\u001b[A\n",
+ " 89%|██████████████████████████████████████▎ | 41/46 [00:01<00:00, 25.82it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 2.340395927429199, 'eval_runtime': 1.8444, 'eval_samples_per_second': 24.94, 'eval_steps_per_second': 24.94, 'epoch': 5.0}\n",
+ " 50%|██████████████████▌ | 2800/5600 [59:02<1:00:22, 1.29s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:01<00:00, 26.16it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-06-29 22:58:02,913 >> Saving model checkpoint to saves/qwen2-0.5b/lora/sft/checkpoint-2800\n",
+ "[INFO|configuration_utils.py:733] 2024-06-29 22:58:03,475 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-06-29 22:58:03,475 >> Model config Qwen2Config {\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-06-29 22:58:03,501 >> tokenizer config file saved in saves/qwen2-0.5b/lora/sft/checkpoint-2800/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-06-29 22:58:03,501 >> Special tokens file saved in saves/qwen2-0.5b/lora/sft/checkpoint-2800/special_tokens_map.json\n",
+ "{'loss': 0.7949, 'grad_norm': 5.331892490386963, 'learning_rate': 5.837531116523682e-05, 'epoch': 5.02}\n",
+ "{'loss': 0.7411, 'grad_norm': 6.530398368835449, 'learning_rate': 5.806788803164034e-05, 'epoch': 5.03}\n",
+ "{'loss': 0.7875, 'grad_norm': 5.302547454833984, 'learning_rate': 5.7760151427217576e-05, 'epoch': 5.05}\n",
+ "{'loss': 0.7917, 'grad_norm': 5.69765567779541, 'learning_rate': 5.745211330880872e-05, 'epoch': 5.07}\n",
+ "{'loss': 0.8076, 'grad_norm': 4.74371862411499, 'learning_rate': 5.714378564496901e-05, 'epoch': 5.09}\n",
+ "{'loss': 0.9093, 'grad_norm': 5.205414772033691, 'learning_rate': 5.683518041550368e-05, 'epoch': 5.1}\n",
+ "{'loss': 0.6875, 'grad_norm': 5.478267192840576, 'learning_rate': 5.6526309611002594e-05, 'epoch': 5.12}\n",
+ "{'loss': 0.8528, 'grad_norm': 6.004168510437012, 'learning_rate': 5.621718523237427e-05, 'epoch': 5.14}\n",
+ "{'loss': 0.7398, 'grad_norm': 6.278602600097656, 'learning_rate': 5.590781929037965e-05, 'epoch': 5.16}\n",
+ "{'loss': 0.6957, 'grad_norm': 4.800085544586182, 'learning_rate': 5.559822380516539e-05, 'epoch': 5.18}\n",
+ "{'loss': 0.8193, 'grad_norm': 5.28222131729126, 'learning_rate': 5.5288410805796895e-05, 'epoch': 5.19}\n",
+ "{'loss': 0.825, 'grad_norm': 5.969717502593994, 'learning_rate': 5.497839232979084e-05, 'epoch': 5.21}\n",
+ "{'loss': 0.8286, 'grad_norm': 7.2066216468811035, 'learning_rate': 5.466818042264753e-05, 'epoch': 5.23}\n",
+ "{'loss': 0.8905, 'grad_norm': 5.272522449493408, 'learning_rate': 5.435778713738292e-05, 'epoch': 5.25}\n",
+ "{'loss': 0.7673, 'grad_norm': 4.872743606567383, 'learning_rate': 5.404722453406017e-05, 'epoch': 5.27}\n",
+ "{'loss': 0.7346, 'grad_norm': 7.293342113494873, 'learning_rate': 5.373650467932122e-05, 'epoch': 5.28}\n",
+ "{'loss': 0.737, 'grad_norm': 5.229869365692139, 'learning_rate': 5.3425639645917834e-05, 'epoch': 5.3}\n",
+ "{'loss': 0.7656, 'grad_norm': 4.550146579742432, 'learning_rate': 5.311464151224261e-05, 'epoch': 5.32}\n",
+ "{'loss': 0.7256, 'grad_norm': 4.5456223487854, 'learning_rate': 5.2803522361859594e-05, 'epoch': 5.34}\n",
+ "{'loss': 0.7886, 'grad_norm': 5.301971912384033, 'learning_rate': 5.249229428303486e-05, 'epoch': 5.35}\n",
+ "{'loss': 0.7801, 'grad_norm': 7.148138523101807, 'learning_rate': 5.218096936826681e-05, 'epoch': 5.37}\n",
+ "{'loss': 0.8076, 'grad_norm': 6.13567590713501, 'learning_rate': 5.18695597138163e-05, 'epoch': 5.39}\n",
+ "{'loss': 0.6323, 'grad_norm': 5.02504825592041, 'learning_rate': 5.155807741923666e-05, 'epoch': 5.41}\n",
+ "{'loss': 0.707, 'grad_norm': 6.629558563232422, 'learning_rate': 5.124653458690365e-05, 'epoch': 5.43}\n",
+ "{'loss': 0.7033, 'grad_norm': 6.333116054534912, 'learning_rate': 5.0934943321545115e-05, 'epoch': 5.44}\n",
+ "{'loss': 0.797, 'grad_norm': 10.160740852355957, 'learning_rate': 5.062331572977076e-05, 'epoch': 5.46}\n",
+ "{'loss': 0.9649, 'grad_norm': 5.644074440002441, 'learning_rate': 5.031166391960168e-05, 'epoch': 5.48}\n",
+ "{'loss': 0.7452, 'grad_norm': 5.57203483581543, 'learning_rate': 5e-05, 'epoch': 5.5}\n",
+ "{'loss': 0.7909, 'grad_norm': 6.231649875640869, 'learning_rate': 4.968833608039832e-05, 'epoch': 5.52}\n",
+ "{'loss': 0.8125, 'grad_norm': 6.369471073150635, 'learning_rate': 4.9376684270229254e-05, 'epoch': 5.53}\n",
+ "{'loss': 0.7572, 'grad_norm': 14.980217933654785, 'learning_rate': 4.9065056678454904e-05, 'epoch': 5.55}\n",
+ "{'loss': 0.9001, 'grad_norm': 6.943779468536377, 'learning_rate': 4.875346541309637e-05, 'epoch': 5.57}\n",
+ "{'loss': 0.8092, 'grad_norm': 6.565555572509766, 'learning_rate': 4.844192258076336e-05, 'epoch': 5.59}\n",
+ "{'loss': 0.9073, 'grad_norm': 8.527596473693848, 'learning_rate': 4.813044028618373e-05, 'epoch': 5.6}\n",
+ "{'loss': 0.8484, 'grad_norm': 5.995067119598389, 'learning_rate': 4.781903063173321e-05, 'epoch': 5.62}\n",
+ "{'loss': 0.7387, 'grad_norm': 16.719541549682617, 'learning_rate': 4.750770571696514e-05, 'epoch': 5.64}\n",
+ "{'loss': 0.8061, 'grad_norm': 5.842343807220459, 'learning_rate': 4.7196477638140404e-05, 'epoch': 5.66}\n",
+ "{'loss': 0.7958, 'grad_norm': 7.201180458068848, 'learning_rate': 4.68853584877574e-05, 'epoch': 5.68}\n",
+ "{'loss': 0.8338, 'grad_norm': 6.153838634490967, 'learning_rate': 4.657436035408217e-05, 'epoch': 5.69}\n",
+ "{'loss': 0.7411, 'grad_norm': 5.899301528930664, 'learning_rate': 4.626349532067879e-05, 'epoch': 5.71}\n",
+ "{'loss': 0.8199, 'grad_norm': 5.865950107574463, 'learning_rate': 4.595277546593984e-05, 'epoch': 5.73}\n",
+ "{'loss': 0.7367, 'grad_norm': 4.905264377593994, 'learning_rate': 4.564221286261709e-05, 'epoch': 5.75}\n",
+ "{'loss': 0.9049, 'grad_norm': 6.099426746368408, 'learning_rate': 4.5331819577352474e-05, 'epoch': 5.77}\n",
+ "{'loss': 0.7343, 'grad_norm': 7.31098747253418, 'learning_rate': 4.502160767020918e-05, 'epoch': 5.78}\n",
+ "{'loss': 0.7381, 'grad_norm': 5.501935958862305, 'learning_rate': 4.471158919420312e-05, 'epoch': 5.8}\n",
+ "{'loss': 0.7133, 'grad_norm': 7.434685707092285, 'learning_rate': 4.4401776194834613e-05, 'epoch': 5.82}\n",
+ "{'loss': 0.7352, 'grad_norm': 9.345376968383789, 'learning_rate': 4.409218070962036e-05, 'epoch': 5.84}\n",
+ "{'loss': 0.8973, 'grad_norm': 6.876387119293213, 'learning_rate': 4.378281476762576e-05, 'epoch': 5.85}\n",
+ "{'loss': 0.6906, 'grad_norm': 14.176045417785645, 'learning_rate': 4.347369038899744e-05, 'epoch': 5.87}\n",
+ "{'loss': 0.9098, 'grad_norm': 4.8011040687561035, 'learning_rate': 4.316481958449634e-05, 'epoch': 5.89}\n",
+ "{'loss': 0.7331, 'grad_norm': 5.1314697265625, 'learning_rate': 4.285621435503101e-05, 'epoch': 5.91}\n",
+ "{'loss': 0.8579, 'grad_norm': 7.106369495391846, 'learning_rate': 4.254788669119127e-05, 'epoch': 5.93}\n",
+ "{'loss': 0.748, 'grad_norm': 4.865246295928955, 'learning_rate': 4.223984857278242e-05, 'epoch': 5.94}\n",
+ "{'loss': 0.8392, 'grad_norm': 5.906892776489258, 'learning_rate': 4.1932111968359664e-05, 'epoch': 5.96}\n",
+ "{'loss': 0.8015, 'grad_norm': 5.705036163330078, 'learning_rate': 4.162468883476319e-05, 'epoch': 5.98}\n",
+ "{'loss': 0.8302, 'grad_norm': 5.603642463684082, 'learning_rate': 4.131759111665349e-05, 'epoch': 6.0}\n",
+ " 60%|██████████████████████▏ | 3360/5600 [1:11:19<47:06, 1.26s/it][INFO|trainer.py:3788] 2024-06-29 23:10:19,379 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-06-29 23:10:19,379 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-06-29 23:10:19,379 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 9%|███▊ | 4/46 [00:00<00:01, 33.50it/s]\u001b[A\n",
+ " 17%|███████▋ | 8/46 [00:00<00:01, 29.86it/s]\u001b[A\n",
+ " 26%|███████████▏ | 12/46 [00:00<00:01, 24.66it/s]\u001b[A\n",
+ " 33%|██████████████ | 15/46 [00:00<00:01, 24.52it/s]\u001b[A\n",
+ " 39%|████████████████▊ | 18/46 [00:00<00:01, 24.31it/s]\u001b[A\n",
+ " 46%|███████████████████▋ | 21/46 [00:00<00:01, 24.94it/s]\u001b[A\n",
+ " 52%|██████████████████████▍ | 24/46 [00:00<00:00, 24.74it/s]\u001b[A\n",
+ " 59%|█████████████████████████▏ | 27/46 [00:01<00:00, 25.63it/s]\u001b[A\n",
+ " 65%|████████████████████████████ | 30/46 [00:01<00:00, 25.39it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▊ | 33/46 [00:01<00:00, 25.47it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▋ | 36/46 [00:01<00:00, 25.85it/s]\u001b[A\n",
+ " 85%|████████████████████████████████████▍ | 39/46 [00:01<00:00, 25.45it/s]\u001b[A\n",
+ " 91%|███████████████████████████████████████▎ | 42/46 [00:01<00:00, 26.26it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 2.5864341259002686, 'eval_runtime': 1.8502, 'eval_samples_per_second': 24.863, 'eval_steps_per_second': 24.863, 'epoch': 6.0}\n",
+ " 60%|██████████████████████▏ | 3360/5600 [1:11:21<47:06, 1.26s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:01<00:00, 24.74it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-06-29 23:10:21,229 >> Saving model checkpoint to saves/qwen2-0.5b/lora/sft/checkpoint-3360\n",
+ "[INFO|configuration_utils.py:733] 2024-06-29 23:10:21,875 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-06-29 23:10:21,876 >> Model config Qwen2Config {\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-06-29 23:10:21,913 >> tokenizer config file saved in saves/qwen2-0.5b/lora/sft/checkpoint-3360/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-06-29 23:10:21,914 >> Special tokens file saved in saves/qwen2-0.5b/lora/sft/checkpoint-3360/special_tokens_map.json\n",
+ "{'loss': 0.6066, 'grad_norm': 5.037189960479736, 'learning_rate': 4.101083074604737e-05, 'epoch': 6.02}\n",
+ "{'loss': 0.5565, 'grad_norm': 15.377517700195312, 'learning_rate': 4.0704419641854274e-05, 'epoch': 6.03}\n",
+ "{'loss': 0.5423, 'grad_norm': 5.809545516967773, 'learning_rate': 4.03983697094132e-05, 'epoch': 6.05}\n",
+ "{'loss': 0.6718, 'grad_norm': 6.553403377532959, 'learning_rate': 4.0092692840030134e-05, 'epoch': 6.07}\n",
+ "{'loss': 0.6391, 'grad_norm': 5.6180901527404785, 'learning_rate': 3.978740091051599e-05, 'epoch': 6.09}\n",
+ "{'loss': 0.6823, 'grad_norm': 5.028639793395996, 'learning_rate': 3.9482505782725224e-05, 'epoch': 6.1}\n",
+ "{'loss': 0.5271, 'grad_norm': 5.877006530761719, 'learning_rate': 3.917801930309486e-05, 'epoch': 6.12}\n",
+ "{'loss': 0.6207, 'grad_norm': 5.783533096313477, 'learning_rate': 3.887395330218429e-05, 'epoch': 6.14}\n",
+ "{'loss': 0.6231, 'grad_norm': 7.259427547454834, 'learning_rate': 3.857031959421553e-05, 'epoch': 6.16}\n",
+ "{'loss': 0.6113, 'grad_norm': 5.142063140869141, 'learning_rate': 3.8267129976614254e-05, 'epoch': 6.18}\n",
+ "{'loss': 0.6245, 'grad_norm': 6.205945014953613, 'learning_rate': 3.7964396229551364e-05, 'epoch': 6.19}\n",
+ "{'loss': 0.6885, 'grad_norm': 5.446501731872559, 'learning_rate': 3.7662130115485314e-05, 'epoch': 6.21}\n",
+ "{'loss': 0.6127, 'grad_norm': 5.06493616104126, 'learning_rate': 3.7360343378705124e-05, 'epoch': 6.23}\n",
+ "{'loss': 0.6773, 'grad_norm': 6.783751964569092, 'learning_rate': 3.705904774487396e-05, 'epoch': 6.25}\n",
+ "{'loss': 0.5281, 'grad_norm': 9.579876899719238, 'learning_rate': 3.675825492057364e-05, 'epoch': 6.27}\n",
+ "{'loss': 0.642, 'grad_norm': 5.472534656524658, 'learning_rate': 3.6457976592849754e-05, 'epoch': 6.28}\n",
+ "{'loss': 0.5956, 'grad_norm': 4.942157745361328, 'learning_rate': 3.6158224428757535e-05, 'epoch': 6.3}\n",
+ "{'loss': 0.6226, 'grad_norm': 4.671095848083496, 'learning_rate': 3.585901007490863e-05, 'epoch': 6.32}\n",
+ "{'loss': 0.6398, 'grad_norm': 7.523804664611816, 'learning_rate': 3.556034515701852e-05, 'epoch': 6.34}\n",
+ "{'loss': 0.5765, 'grad_norm': 5.049912929534912, 'learning_rate': 3.5262241279454785e-05, 'epoch': 6.35}\n",
+ "{'loss': 0.5686, 'grad_norm': 4.921656608581543, 'learning_rate': 3.4964710024786354e-05, 'epoch': 6.37}\n",
+ "{'loss': 0.5776, 'grad_norm': 4.165037155151367, 'learning_rate': 3.4667762953333295e-05, 'epoch': 6.39}\n",
+ "{'loss': 0.5287, 'grad_norm': 14.588775634765625, 'learning_rate': 3.4371411602717784e-05, 'epoch': 6.41}\n",
+ "{'loss': 0.6236, 'grad_norm': 4.320646286010742, 'learning_rate': 3.4075667487415785e-05, 'epoch': 6.43}\n",
+ "{'loss': 0.6174, 'grad_norm': 4.169257164001465, 'learning_rate': 3.3780542098309654e-05, 'epoch': 6.44}\n",
+ "{'loss': 0.6235, 'grad_norm': 3.5882270336151123, 'learning_rate': 3.3486046902241664e-05, 'epoch': 6.46}\n",
+ "{'loss': 0.6929, 'grad_norm': 5.139246940612793, 'learning_rate': 3.319219334156847e-05, 'epoch': 6.48}\n",
+ "{'loss': 0.6181, 'grad_norm': 6.403084754943848, 'learning_rate': 3.289899283371657e-05, 'epoch': 6.5}\n",
+ "{'loss': 0.702, 'grad_norm': 5.330471038818359, 'learning_rate': 3.2606456770738636e-05, 'epoch': 6.51}\n",
+ "{'loss': 0.558, 'grad_norm': 6.444238662719727, 'learning_rate': 3.231459651887093e-05, 'epoch': 6.53}\n",
+ "{'loss': 0.6338, 'grad_norm': 5.4946417808532715, 'learning_rate': 3.2023423418091626e-05, 'epoch': 6.55}\n",
+ "{'loss': 0.5606, 'grad_norm': 5.147060871124268, 'learning_rate': 3.173294878168025e-05, 'epoch': 6.57}\n",
+ "{'loss': 0.7059, 'grad_norm': 5.5029754638671875, 'learning_rate': 3.1443183895778105e-05, 'epoch': 6.59}\n",
+ "{'loss': 0.647, 'grad_norm': 5.451030731201172, 'learning_rate': 3.115414001894974e-05, 'epoch': 6.6}\n",
+ "{'loss': 0.6321, 'grad_norm': 5.880076885223389, 'learning_rate': 3.086582838174551e-05, 'epoch': 6.62}\n",
+ "{'loss': 0.6221, 'grad_norm': 12.090547561645508, 'learning_rate': 3.0578260186265265e-05, 'epoch': 6.64}\n",
+ "{'loss': 0.5515, 'grad_norm': 4.961390495300293, 'learning_rate': 3.029144660572304e-05, 'epoch': 6.66}\n",
+ "{'loss': 0.6303, 'grad_norm': 8.35487174987793, 'learning_rate': 3.000539878401296e-05, 'epoch': 6.68}\n",
+ "{'loss': 0.6175, 'grad_norm': 5.784793376922607, 'learning_rate': 2.9720127835276256e-05, 'epoch': 6.69}\n",
+ "{'loss': 0.6365, 'grad_norm': 5.296642780303955, 'learning_rate': 2.9435644843469436e-05, 'epoch': 6.71}\n",
+ "{'loss': 0.6053, 'grad_norm': 5.430149078369141, 'learning_rate': 2.9151960861933614e-05, 'epoch': 6.73}\n",
+ "{'loss': 0.6135, 'grad_norm': 5.0150980949401855, 'learning_rate': 2.886908691296504e-05, 'epoch': 6.75}\n",
+ "{'loss': 0.6041, 'grad_norm': 5.136585235595703, 'learning_rate': 2.858703398738686e-05, 'epoch': 6.76}\n",
+ "{'loss': 0.5429, 'grad_norm': 4.231466293334961, 'learning_rate': 2.8305813044122097e-05, 'epoch': 6.78}\n",
+ "{'loss': 0.5956, 'grad_norm': 5.151216983795166, 'learning_rate': 2.8025435009767747e-05, 'epoch': 6.8}\n",
+ "{'loss': 0.5732, 'grad_norm': 3.7542734146118164, 'learning_rate': 2.774591077817038e-05, 'epoch': 6.82}\n",
+ "{'loss': 0.6358, 'grad_norm': 6.12777042388916, 'learning_rate': 2.746725121000273e-05, 'epoch': 6.84}\n",
+ "{'loss': 0.5031, 'grad_norm': 11.638378143310547, 'learning_rate': 2.718946713234185e-05, 'epoch': 6.85}\n",
+ "{'loss': 0.6171, 'grad_norm': 9.199576377868652, 'learning_rate': 2.6912569338248315e-05, 'epoch': 6.87}\n",
+ "{'loss': 0.6104, 'grad_norm': 10.14255428314209, 'learning_rate': 2.66365685863469e-05, 'epoch': 6.89}\n",
+ "{'loss': 0.7077, 'grad_norm': 9.090829849243164, 'learning_rate': 2.636147560040866e-05, 'epoch': 6.91}\n",
+ "{'loss': 0.5531, 'grad_norm': 9.668030738830566, 'learning_rate': 2.6087301068934106e-05, 'epoch': 6.93}\n",
+ "{'loss': 0.6159, 'grad_norm': 6.352726936340332, 'learning_rate': 2.581405564473801e-05, 'epoch': 6.94}\n",
+ "{'loss': 0.6046, 'grad_norm': 5.168361663818359, 'learning_rate': 2.5541749944535554e-05, 'epoch': 6.96}\n",
+ "{'loss': 0.7733, 'grad_norm': 7.233384132385254, 'learning_rate': 2.527039454852963e-05, 'epoch': 6.98}\n",
+ "{'loss': 0.6154, 'grad_norm': 9.114374160766602, 'learning_rate': 2.500000000000001e-05, 'epoch': 7.0}\n",
+ " 70%|█████████████████████████▉ | 3920/5600 [1:23:31<34:46, 1.24s/it][INFO|trainer.py:3788] 2024-06-29 23:22:31,824 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-06-29 23:22:31,824 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-06-29 23:22:31,824 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 9%|███▊ | 4/46 [00:00<00:01, 35.06it/s]\u001b[A\n",
+ " 17%|███████▋ | 8/46 [00:00<00:01, 30.15it/s]\u001b[A\n",
+ " 26%|███████████▏ | 12/46 [00:00<00:01, 27.73it/s]\u001b[A\n",
+ " 33%|██████████████ | 15/46 [00:00<00:01, 27.79it/s]\u001b[A\n",
+ " 39%|████████████████▊ | 18/46 [00:00<00:01, 27.85it/s]\u001b[A\n",
+ " 46%|███████████████████▋ | 21/46 [00:00<00:00, 27.80it/s]\u001b[A\n",
+ " 52%|██████████████████████▍ | 24/46 [00:00<00:00, 27.45it/s]\u001b[A\n",
+ " 59%|█████████████████████████▏ | 27/46 [00:00<00:00, 27.28it/s]\u001b[A\n",
+ " 65%|████████████████████████████ | 30/46 [00:01<00:00, 27.03it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▊ | 33/46 [00:01<00:00, 27.21it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▋ | 36/46 [00:01<00:00, 27.28it/s]\u001b[A\n",
+ " 85%|████████████████████████████████████▍ | 39/46 [00:01<00:00, 27.24it/s]\u001b[A\n",
+ " 91%|███████████████████████████████████████▎ | 42/46 [00:01<00:00, 27.01it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 2.8612773418426514, 'eval_runtime': 1.7012, 'eval_samples_per_second': 27.04, 'eval_steps_per_second': 27.04, 'epoch': 7.0}\n",
+ " 70%|█████████████████████████▉ | 3920/5600 [1:23:33<34:46, 1.24s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:01<00:00, 27.52it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-06-29 23:22:33,526 >> Saving model checkpoint to saves/qwen2-0.5b/lora/sft/checkpoint-3920\n",
+ "[INFO|configuration_utils.py:733] 2024-06-29 23:22:34,201 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-06-29 23:22:34,202 >> Model config Qwen2Config {\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-06-29 23:22:34,235 >> tokenizer config file saved in saves/qwen2-0.5b/lora/sft/checkpoint-3920/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-06-29 23:22:34,235 >> Special tokens file saved in saves/qwen2-0.5b/lora/sft/checkpoint-3920/special_tokens_map.json\n",
+ "{'loss': 0.4505, 'grad_norm': 4.652220726013184, 'learning_rate': 2.473057680489348e-05, 'epoch': 7.01}\n",
+ "{'loss': 0.385, 'grad_norm': 2.926722526550293, 'learning_rate': 2.4462135431415733e-05, 'epoch': 7.03}\n",
+ "{'loss': 0.4096, 'grad_norm': 6.222466468811035, 'learning_rate': 2.4194686309624663e-05, 'epoch': 7.05}\n",
+ "{'loss': 0.553, 'grad_norm': 3.829651117324829, 'learning_rate': 2.39282398310251e-05, 'epoch': 7.07}\n",
+ "{'loss': 0.403, 'grad_norm': 5.209712982177734, 'learning_rate': 2.366280634816496e-05, 'epoch': 7.09}\n",
+ "{'loss': 0.5494, 'grad_norm': 4.316225051879883, 'learning_rate': 2.3398396174233178e-05, 'epoch': 7.1}\n",
+ "{'loss': 0.4251, 'grad_norm': 5.665122985839844, 'learning_rate': 2.3135019582658802e-05, 'epoch': 7.12}\n",
+ "{'loss': 0.4833, 'grad_norm': 5.162817478179932, 'learning_rate': 2.2872686806712035e-05, 'epoch': 7.14}\n",
+ "{'loss': 0.4832, 'grad_norm': 4.767073631286621, 'learning_rate': 2.261140803910644e-05, 'epoch': 7.16}\n",
+ "{'loss': 0.4627, 'grad_norm': 6.984405994415283, 'learning_rate': 2.235119343160303e-05, 'epoch': 7.18}\n",
+ "{'loss': 0.48, 'grad_norm': 5.248043060302734, 'learning_rate': 2.2092053094615813e-05, 'epoch': 7.19}\n",
+ "{'loss': 0.4851, 'grad_norm': 5.531778812408447, 'learning_rate': 2.1833997096818898e-05, 'epoch': 7.21}\n",
+ "{'loss': 0.4751, 'grad_norm': 5.573154926300049, 'learning_rate': 2.157703546475539e-05, 'epoch': 7.23}\n",
+ "{'loss': 0.5816, 'grad_norm': 4.959446430206299, 'learning_rate': 2.132117818244771e-05, 'epoch': 7.25}\n",
+ "{'loss': 0.4175, 'grad_norm': 4.046441078186035, 'learning_rate': 2.1066435191009715e-05, 'epoch': 7.26}\n",
+ "{'loss': 0.5647, 'grad_norm': 7.062335968017578, 'learning_rate': 2.0812816388260518e-05, 'epoch': 7.28}\n",
+ "{'loss': 0.479, 'grad_norm': 4.6393914222717285, 'learning_rate': 2.056033162833977e-05, 'epoch': 7.3}\n",
+ "{'loss': 0.471, 'grad_norm': 5.455317497253418, 'learning_rate': 2.0308990721324927e-05, 'epoch': 7.32}\n",
+ "{'loss': 0.5885, 'grad_norm': 4.32041597366333, 'learning_rate': 2.0058803432849987e-05, 'epoch': 7.34}\n",
+ "{'loss': 0.4522, 'grad_norm': 4.541329383850098, 'learning_rate': 1.980977948372612e-05, 'epoch': 7.35}\n",
+ "{'loss': 0.4865, 'grad_norm': 5.104362964630127, 'learning_rate': 1.9561928549563968e-05, 'epoch': 7.37}\n",
+ "{'loss': 0.554, 'grad_norm': 5.151457786560059, 'learning_rate': 1.931526026039764e-05, 'epoch': 7.39}\n",
+ "{'loss': 0.4381, 'grad_norm': 5.234814643859863, 'learning_rate': 1.906978420031059e-05, 'epoch': 7.41}\n",
+ "{'loss': 0.5294, 'grad_norm': 6.009786128997803, 'learning_rate': 1.8825509907063327e-05, 'epoch': 7.43}\n",
+ "{'loss': 0.4886, 'grad_norm': 6.153667449951172, 'learning_rate': 1.8582446871722636e-05, 'epoch': 7.44}\n",
+ "{'loss': 0.5583, 'grad_norm': 5.528926849365234, 'learning_rate': 1.8340604538293015e-05, 'epoch': 7.46}\n",
+ "{'loss': 0.5186, 'grad_norm': 6.47043514251709, 'learning_rate': 1.8099992303349577e-05, 'epoch': 7.48}\n",
+ "{'loss': 0.4369, 'grad_norm': 4.640471458435059, 'learning_rate': 1.7860619515673033e-05, 'epoch': 7.5}\n",
+ "{'loss': 0.485, 'grad_norm': 4.996728420257568, 'learning_rate': 1.7622495475886487e-05, 'epoch': 7.51}\n",
+ "{'loss': 0.5824, 'grad_norm': 7.510169982910156, 'learning_rate': 1.738562943609396e-05, 'epoch': 7.53}\n",
+ "{'loss': 0.5401, 'grad_norm': 5.8573503494262695, 'learning_rate': 1.7150030599520984e-05, 'epoch': 7.55}\n",
+ "{'loss': 0.4099, 'grad_norm': 4.604180335998535, 'learning_rate': 1.691570812015704e-05, 'epoch': 7.57}\n",
+ "{'loss': 0.4631, 'grad_norm': 9.454184532165527, 'learning_rate': 1.6682671102399805e-05, 'epoch': 7.59}\n",
+ "{'loss': 0.4046, 'grad_norm': 3.9995360374450684, 'learning_rate': 1.6450928600701504e-05, 'epoch': 7.6}\n",
+ "{'loss': 0.3599, 'grad_norm': 5.843255043029785, 'learning_rate': 1.622048961921699e-05, 'epoch': 7.62}\n",
+ "{'loss': 0.5126, 'grad_norm': 5.647862434387207, 'learning_rate': 1.599136311145402e-05, 'epoch': 7.64}\n",
+ "{'loss': 0.6103, 'grad_norm': 6.46891450881958, 'learning_rate': 1.5763557979925324e-05, 'epoch': 7.66}\n",
+ "{'loss': 0.5807, 'grad_norm': 6.223480224609375, 'learning_rate': 1.553708307580265e-05, 'epoch': 7.68}\n",
+ "{'loss': 0.5201, 'grad_norm': 4.753687381744385, 'learning_rate': 1.531194719857292e-05, 'epoch': 7.69}\n",
+ "{'loss': 0.3547, 'grad_norm': 5.846710681915283, 'learning_rate': 1.5088159095696363e-05, 'epoch': 7.71}\n",
+ "{'loss': 0.5295, 'grad_norm': 5.13261079788208, 'learning_rate': 1.4865727462266543e-05, 'epoch': 7.73}\n",
+ "{'loss': 0.5368, 'grad_norm': 4.849207401275635, 'learning_rate': 1.4644660940672627e-05, 'epoch': 7.75}\n",
+ "{'loss': 0.5151, 'grad_norm': 4.458810806274414, 'learning_rate': 1.4424968120263504e-05, 'epoch': 7.76}\n",
+ "{'loss': 0.4958, 'grad_norm': 7.0515360832214355, 'learning_rate': 1.4206657537014079e-05, 'epoch': 7.78}\n",
+ "{'loss': 0.5166, 'grad_norm': 6.9797258377075195, 'learning_rate': 1.398973767319368e-05, 'epoch': 7.8}\n",
+ "{'loss': 0.5007, 'grad_norm': 8.272122383117676, 'learning_rate': 1.3774216957036367e-05, 'epoch': 7.82}\n",
+ "{'loss': 0.4178, 'grad_norm': 5.713352203369141, 'learning_rate': 1.3560103762413584e-05, 'epoch': 7.84}\n",
+ "{'loss': 0.4001, 'grad_norm': 7.498878479003906, 'learning_rate': 1.3347406408508695e-05, 'epoch': 7.85}\n",
+ "{'loss': 0.5782, 'grad_norm': 6.81415319442749, 'learning_rate': 1.3136133159493802e-05, 'epoch': 7.87}\n",
+ "{'loss': 0.493, 'grad_norm': 5.0307936668396, 'learning_rate': 1.2926292224208664e-05, 'epoch': 7.89}\n",
+ "{'loss': 0.4523, 'grad_norm': 4.477788925170898, 'learning_rate': 1.2717891755841722e-05, 'epoch': 7.91}\n",
+ "{'loss': 0.496, 'grad_norm': 5.846407413482666, 'learning_rate': 1.2510939851613285e-05, 'epoch': 7.93}\n",
+ "{'loss': 0.5292, 'grad_norm': 7.384892463684082, 'learning_rate': 1.230544455246101e-05, 'epoch': 7.94}\n",
+ "{'loss': 0.425, 'grad_norm': 6.020524978637695, 'learning_rate': 1.2101413842727345e-05, 'epoch': 7.96}\n",
+ "{'loss': 0.5331, 'grad_norm': 5.7436699867248535, 'learning_rate': 1.1898855649849461e-05, 'epoch': 7.98}\n",
+ "{'loss': 0.3988, 'grad_norm': 4.166412353515625, 'learning_rate': 1.1697777844051105e-05, 'epoch': 8.0}\n",
+ " 80%|█████████████████████████████▌ | 4480/5600 [1:35:21<23:03, 1.24s/it][INFO|trainer.py:3788] 2024-06-29 23:34:21,043 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-06-29 23:34:21,043 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-06-29 23:34:21,043 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 9%|███▊ | 4/46 [00:00<00:01, 36.84it/s]\u001b[A\n",
+ " 17%|███████▋ | 8/46 [00:00<00:01, 31.27it/s]\u001b[A\n",
+ " 26%|███████████▏ | 12/46 [00:00<00:01, 26.39it/s]\u001b[A\n",
+ " 33%|██████████████ | 15/46 [00:00<00:01, 26.67it/s]\u001b[A\n",
+ " 39%|████████████████▊ | 18/46 [00:00<00:01, 26.22it/s]\u001b[A\n",
+ " 46%|███████████████████▋ | 21/46 [00:00<00:00, 26.57it/s]\u001b[A\n",
+ " 52%|██████████████████████▍ | 24/46 [00:00<00:00, 27.11it/s]\u001b[A\n",
+ " 59%|█████████████████████████▏ | 27/46 [00:00<00:00, 26.68it/s]\u001b[A\n",
+ " 65%|████████████████████████████ | 30/46 [00:01<00:00, 26.95it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▊ | 33/46 [00:01<00:00, 27.15it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▋ | 36/46 [00:01<00:00, 26.95it/s]\u001b[A\n",
+ " 85%|████████████████████████████████████▍ | 39/46 [00:01<00:00, 26.54it/s]\u001b[A\n",
+ " 91%|███████████████████████████████████████▎ | 42/46 [00:01<00:00, 26.66it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 3.1611218452453613, 'eval_runtime': 1.7732, 'eval_samples_per_second': 25.942, 'eval_steps_per_second': 25.942, 'epoch': 8.0}\n",
+ " 80%|█████████████████████████████▌ | 4480/5600 [1:35:22<23:03, 1.24s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:01<00:00, 25.30it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-06-29 23:34:22,817 >> Saving model checkpoint to saves/qwen2-0.5b/lora/sft/checkpoint-4480\n",
+ "[INFO|configuration_utils.py:733] 2024-06-29 23:34:23,861 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-06-29 23:34:23,861 >> Model config Qwen2Config {\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-06-29 23:34:23,897 >> tokenizer config file saved in saves/qwen2-0.5b/lora/sft/checkpoint-4480/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-06-29 23:34:23,898 >> Special tokens file saved in saves/qwen2-0.5b/lora/sft/checkpoint-4480/special_tokens_map.json\n",
+ "{'loss': 0.3943, 'grad_norm': 3.8637747764587402, 'learning_rate': 1.1498188238036861e-05, 'epoch': 8.01}\n",
+ "{'loss': 0.3449, 'grad_norm': 4.5357465744018555, 'learning_rate': 1.130009458668863e-05, 'epoch': 8.03}\n",
+ "{'loss': 0.4843, 'grad_norm': 4.340099334716797, 'learning_rate': 1.1103504586764263e-05, 'epoch': 8.05}\n",
+ "{'loss': 0.3624, 'grad_norm': 5.39348030090332, 'learning_rate': 1.090842587659851e-05, 'epoch': 8.07}\n",
+ "{'loss': 0.3457, 'grad_norm': 5.173300743103027, 'learning_rate': 1.0714866035806326e-05, 'epoch': 8.09}\n",
+ "{'loss': 0.4419, 'grad_norm': 3.9911515712738037, 'learning_rate': 1.0522832584988234e-05, 'epoch': 8.1}\n",
+ "{'loss': 0.344, 'grad_norm': 4.810797214508057, 'learning_rate': 1.0332332985438248e-05, 'epoch': 8.12}\n",
+ "{'loss': 0.4852, 'grad_norm': 4.6731462478637695, 'learning_rate': 1.0143374638853891e-05, 'epoch': 8.14}\n",
+ "{'loss': 0.3417, 'grad_norm': 4.519662857055664, 'learning_rate': 9.955964887048607e-06, 'epoch': 8.16}\n",
+ "{'loss': 0.3707, 'grad_norm': 6.008825302124023, 'learning_rate': 9.770111011666583e-06, 'epoch': 8.17}\n",
+ "{'loss': 0.4661, 'grad_norm': 4.404787540435791, 'learning_rate': 9.58582023389974e-06, 'epoch': 8.19}\n",
+ "{'loss': 0.4659, 'grad_norm': 3.959002733230591, 'learning_rate': 9.403099714207175e-06, 'epoch': 8.21}\n",
+ "{'loss': 0.4714, 'grad_norm': 5.200716972351074, 'learning_rate': 9.221956552036992e-06, 'epoch': 8.23}\n",
+ "{'loss': 0.4455, 'grad_norm': 4.942255973815918, 'learning_rate': 9.042397785550405e-06, 'epoch': 8.25}\n",
+ "{'loss': 0.4403, 'grad_norm': 5.782726764678955, 'learning_rate': 8.864430391348332e-06, 'epoch': 8.26}\n",
+ "{'loss': 0.3306, 'grad_norm': 3.7129645347595215, 'learning_rate': 8.688061284200266e-06, 'epoch': 8.28}\n",
+ "{'loss': 0.407, 'grad_norm': 8.037576675415039, 'learning_rate': 8.513297316775625e-06, 'epoch': 8.3}\n",
+ "{'loss': 0.3798, 'grad_norm': 5.382339000701904, 'learning_rate': 8.34014527937756e-06, 'epoch': 8.32}\n",
+ "{'loss': 0.4849, 'grad_norm': 4.840189456939697, 'learning_rate': 8.168611899679013e-06, 'epoch': 8.34}\n",
+ "{'loss': 0.417, 'grad_norm': 5.7303619384765625, 'learning_rate': 7.998703842461431e-06, 'epoch': 8.35}\n",
+ "{'loss': 0.4589, 'grad_norm': 6.085827350616455, 'learning_rate': 7.830427709355725e-06, 'epoch': 8.37}\n",
+ "{'loss': 0.4225, 'grad_norm': 4.722183704376221, 'learning_rate': 7.663790038585793e-06, 'epoch': 8.39}\n",
+ "{'loss': 0.3773, 'grad_norm': 5.256956100463867, 'learning_rate': 7.498797304714544e-06, 'epoch': 8.41}\n",
+ "{'loss': 0.4246, 'grad_norm': 5.674898624420166, 'learning_rate': 7.33545591839222e-06, 'epoch': 8.42}\n",
+ "{'loss': 0.4219, 'grad_norm': 4.489896774291992, 'learning_rate': 7.173772226107434e-06, 'epoch': 8.44}\n",
+ "{'loss': 0.4486, 'grad_norm': 5.115447521209717, 'learning_rate': 7.013752509940485e-06, 'epoch': 8.46}\n",
+ "{'loss': 0.4512, 'grad_norm': 4.548392295837402, 'learning_rate': 6.855402987319348e-06, 'epoch': 8.48}\n",
+ "{'loss': 0.3836, 'grad_norm': 6.2048258781433105, 'learning_rate': 6.698729810778065e-06, 'epoch': 8.5}\n",
+ "{'loss': 0.3778, 'grad_norm': 4.5989766120910645, 'learning_rate': 6.54373906771768e-06, 'epoch': 8.51}\n",
+ "{'loss': 0.4007, 'grad_norm': 5.147210121154785, 'learning_rate': 6.390436780169734e-06, 'epoch': 8.53}\n",
+ "{'loss': 0.4504, 'grad_norm': 4.499249458312988, 'learning_rate': 6.238828904562316e-06, 'epoch': 8.55}\n",
+ "{'loss': 0.4176, 'grad_norm': 4.788080215454102, 'learning_rate': 6.088921331488568e-06, 'epoch': 8.57}\n",
+ "{'loss': 0.2845, 'grad_norm': 3.5535483360290527, 'learning_rate': 5.94071988547788e-06, 'epoch': 8.59}\n",
+ "{'loss': 0.3807, 'grad_norm': 4.653518199920654, 'learning_rate': 5.794230324769517e-06, 'epoch': 8.6}\n",
+ "{'loss': 0.3348, 'grad_norm': 4.7170915603637695, 'learning_rate': 5.649458341088915e-06, 'epoch': 8.62}\n",
+ "{'loss': 0.3807, 'grad_norm': 17.9665584564209, 'learning_rate': 5.506409559426573e-06, 'epoch': 8.64}\n",
+ "{'loss': 0.4922, 'grad_norm': 4.38849401473999, 'learning_rate': 5.365089537819434e-06, 'epoch': 8.66}\n",
+ "{'loss': 0.4016, 'grad_norm': 4.990530967712402, 'learning_rate': 5.2255037671349535e-06, 'epoch': 8.67}\n",
+ "{'loss': 0.4209, 'grad_norm': 4.245598793029785, 'learning_rate': 5.087657670857798e-06, 'epoch': 8.69}\n",
+ "{'loss': 0.3529, 'grad_norm': 3.6876637935638428, 'learning_rate': 4.951556604879048e-06, 'epoch': 8.71}\n",
+ "{'loss': 0.4205, 'grad_norm': 6.267766952514648, 'learning_rate': 4.8172058572881765e-06, 'epoch': 8.73}\n",
+ "{'loss': 0.391, 'grad_norm': 4.628519535064697, 'learning_rate': 4.684610648167503e-06, 'epoch': 8.75}\n",
+ "{'loss': 0.4038, 'grad_norm': 5.335127353668213, 'learning_rate': 4.5537761293894535e-06, 'epoch': 8.76}\n",
+ "{'loss': 0.4519, 'grad_norm': 5.06191349029541, 'learning_rate': 4.424707384416344e-06, 'epoch': 8.78}\n",
+ "{'loss': 0.4043, 'grad_norm': 3.3718318939208984, 'learning_rate': 4.29740942810285e-06, 'epoch': 8.8}\n",
+ "{'loss': 0.4329, 'grad_norm': 5.270512104034424, 'learning_rate': 4.1718872065011904e-06, 'epoch': 8.82}\n",
+ "{'loss': 0.4345, 'grad_norm': 4.938543796539307, 'learning_rate': 4.048145596668967e-06, 'epoch': 8.84}\n",
+ "{'loss': 0.4661, 'grad_norm': 4.726830005645752, 'learning_rate': 3.9261894064796135e-06, 'epoch': 8.85}\n",
+ "{'loss': 0.4037, 'grad_norm': 4.747579574584961, 'learning_rate': 3.8060233744356633e-06, 'epoch': 8.87}\n",
+ "{'loss': 0.3594, 'grad_norm': 3.65122652053833, 'learning_rate': 3.687652169484568e-06, 'epoch': 8.89}\n",
+ "{'loss': 0.3756, 'grad_norm': 3.7553329467773438, 'learning_rate': 3.5710803908373224e-06, 'epoch': 8.91}\n",
+ "{'loss': 0.4363, 'grad_norm': 6.1218132972717285, 'learning_rate': 3.4563125677897932e-06, 'epoch': 8.92}\n",
+ "{'loss': 0.5039, 'grad_norm': 6.221901893615723, 'learning_rate': 3.343353159546675e-06, 'epoch': 8.94}\n",
+ "{'loss': 0.4145, 'grad_norm': 4.449114799499512, 'learning_rate': 3.2322065550483007e-06, 'epoch': 8.96}\n",
+ "{'loss': 0.3358, 'grad_norm': 3.244713306427002, 'learning_rate': 3.1228770728000455e-06, 'epoch': 8.98}\n",
+ "{'loss': 0.3726, 'grad_norm': 5.383361339569092, 'learning_rate': 3.0153689607045845e-06, 'epoch': 9.0}\n",
+ " 90%|█████████████████████████████████▎ | 5040/5600 [1:47:11<11:48, 1.26s/it][INFO|trainer.py:3788] 2024-06-29 23:46:11,764 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-06-29 23:46:11,764 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-06-29 23:46:11,764 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 9%|███▊ | 4/46 [00:00<00:01, 28.58it/s]\u001b[A\n",
+ " 15%|██████▋ | 7/46 [00:00<00:01, 27.42it/s]\u001b[A\n",
+ " 22%|█████████▎ | 10/46 [00:00<00:01, 27.02it/s]\u001b[A\n",
+ " 28%|████████████▏ | 13/46 [00:00<00:01, 27.51it/s]\u001b[A\n",
+ " 35%|██████████████▉ | 16/46 [00:00<00:01, 27.44it/s]\u001b[A\n",
+ " 41%|█████████████████▊ | 19/46 [00:00<00:00, 27.84it/s]\u001b[A\n",
+ " 48%|████████████████████▌ | 22/46 [00:00<00:00, 26.64it/s]\u001b[A\n",
+ " 54%|███████████████████████▎ | 25/46 [00:00<00:00, 25.39it/s]\u001b[A\n",
+ " 61%|██████████████████████████▏ | 28/46 [00:01<00:00, 25.72it/s]\u001b[A\n",
+ " 67%|████████████████████████████▉ | 31/46 [00:01<00:00, 25.99it/s]\u001b[A\n",
+ " 74%|███████████████████████████████▊ | 34/46 [00:01<00:00, 26.47it/s]\u001b[A\n",
+ " 80%|██████████████████████████████████▌ | 37/46 [00:01<00:00, 26.99it/s]\u001b[A\n",
+ " 87%|█████████████████████████████████████▍ | 40/46 [00:01<00:00, 25.12it/s]\u001b[A\n",
+ " 93%|████████████████████████████████████████▏ | 43/46 [00:01<00:00, 26.14it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 3.362522602081299, 'eval_runtime': 1.7721, 'eval_samples_per_second': 25.958, 'eval_steps_per_second': 25.958, 'epoch': 9.0}\n",
+ " 90%|█████████████████████████████████▎ | 5040/5600 [1:47:13<11:48, 1.26s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:01<00:00, 26.66it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-06-29 23:46:13,536 >> Saving model checkpoint to saves/qwen2-0.5b/lora/sft/checkpoint-5040\n",
+ "[INFO|configuration_utils.py:733] 2024-06-29 23:46:14,139 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-06-29 23:46:14,139 >> Model config Qwen2Config {\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-06-29 23:46:14,173 >> tokenizer config file saved in saves/qwen2-0.5b/lora/sft/checkpoint-5040/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-06-29 23:46:14,173 >> Special tokens file saved in saves/qwen2-0.5b/lora/sft/checkpoint-5040/special_tokens_map.json\n",
+ "{'loss': 0.3451, 'grad_norm': 5.190840244293213, 'learning_rate': 2.9096863958968268e-06, 'epoch': 9.01}\n",
+ "{'loss': 0.33, 'grad_norm': 3.5857107639312744, 'learning_rate': 2.8058334845816213e-06, 'epoch': 9.03}\n",
+ "{'loss': 0.3675, 'grad_norm': 4.7077860832214355, 'learning_rate': 2.7038142618741992e-06, 'epoch': 9.05}\n",
+ "{'loss': 0.4356, 'grad_norm': 4.774041175842285, 'learning_rate': 2.603632691643415e-06, 'epoch': 9.07}\n",
+ "{'loss': 0.3459, 'grad_norm': 3.2734451293945312, 'learning_rate': 2.5052926663577e-06, 'epoch': 9.09}\n",
+ "{'loss': 0.3926, 'grad_norm': 5.444535732269287, 'learning_rate': 2.408798006933882e-06, 'epoch': 9.1}\n",
+ "{'loss': 0.295, 'grad_norm': 4.564394474029541, 'learning_rate': 2.314152462588659e-06, 'epoch': 9.12}\n",
+ "{'loss': 0.3274, 'grad_norm': 3.5276427268981934, 'learning_rate': 2.221359710692961e-06, 'epoch': 9.14}\n",
+ "{'loss': 0.3454, 'grad_norm': 4.8225603103637695, 'learning_rate': 2.1304233566290964e-06, 'epoch': 9.16}\n",
+ "{'loss': 0.2982, 'grad_norm': 3.1064751148223877, 'learning_rate': 2.041346933650612e-06, 'epoch': 9.17}\n",
+ "{'loss': 0.3529, 'grad_norm': 3.431065082550049, 'learning_rate': 1.9541339027450256e-06, 'epoch': 9.19}\n",
+ "{'loss': 0.4354, 'grad_norm': 4.004822254180908, 'learning_rate': 1.8687876524993987e-06, 'epoch': 9.21}\n",
+ "{'loss': 0.3608, 'grad_norm': 5.244897842407227, 'learning_rate': 1.785311498968617e-06, 'epoch': 9.23}\n",
+ "{'loss': 0.3693, 'grad_norm': 4.393815517425537, 'learning_rate': 1.70370868554659e-06, 'epoch': 9.25}\n",
+ "{'loss': 0.3802, 'grad_norm': 4.819892883300781, 'learning_rate': 1.6239823828401945e-06, 'epoch': 9.26}\n",
+ "{'loss': 0.3838, 'grad_norm': 3.781949996948242, 'learning_rate': 1.5461356885461075e-06, 'epoch': 9.28}\n",
+ "{'loss': 0.4715, 'grad_norm': 4.076176166534424, 'learning_rate': 1.4701716273304521e-06, 'epoch': 9.3}\n",
+ "{'loss': 0.3256, 'grad_norm': 4.226771354675293, 'learning_rate': 1.3960931507112752e-06, 'epoch': 9.32}\n",
+ "{'loss': 0.3638, 'grad_norm': 3.562203884124756, 'learning_rate': 1.3239031369438326e-06, 'epoch': 9.34}\n",
+ "{'loss': 0.3687, 'grad_norm': 4.55058479309082, 'learning_rate': 1.2536043909088191e-06, 'epoch': 9.35}\n",
+ "{'loss': 0.3869, 'grad_norm': 4.373401165008545, 'learning_rate': 1.1851996440033319e-06, 'epoch': 9.37}\n",
+ "{'loss': 0.3151, 'grad_norm': 4.085133075714111, 'learning_rate': 1.118691554034773e-06, 'epoch': 9.39}\n",
+ "{'loss': 0.3557, 'grad_norm': 4.491430282592773, 'learning_rate': 1.0540827051175818e-06, 'epoch': 9.41}\n",
+ "{'loss': 0.405, 'grad_norm': 4.82833194732666, 'learning_rate': 9.913756075728087e-07, 'epoch': 9.42}\n",
+ "{'loss': 0.2972, 'grad_norm': 2.666112184524536, 'learning_rate': 9.305726978306173e-07, 'epoch': 9.44}\n",
+ "{'loss': 0.3194, 'grad_norm': 4.029996871948242, 'learning_rate': 8.716763383355864e-07, 'epoch': 9.46}\n",
+ "{'loss': 0.3984, 'grad_norm': 3.864152193069458, 'learning_rate': 8.146888174549339e-07, 'epoch': 9.48}\n",
+ "{'loss': 0.3483, 'grad_norm': 4.201892375946045, 'learning_rate': 7.596123493895991e-07, 'epoch': 9.5}\n",
+ "{'loss': 0.4642, 'grad_norm': 4.560868740081787, 'learning_rate': 7.064490740882057e-07, 'epoch': 9.51}\n",
+ "{'loss': 0.379, 'grad_norm': 4.305575370788574, 'learning_rate': 6.552010571639456e-07, 'epoch': 9.53}\n",
+ "{'loss': 0.445, 'grad_norm': 5.4909772872924805, 'learning_rate': 6.058702898142643e-07, 'epoch': 9.55}\n",
+ "{'loss': 0.3116, 'grad_norm': 4.831486225128174, 'learning_rate': 5.584586887435739e-07, 'epoch': 9.57}\n",
+ "{'loss': 0.3896, 'grad_norm': 4.905820846557617, 'learning_rate': 5.129680960887007e-07, 'epoch': 9.59}\n",
+ "{'loss': 0.3798, 'grad_norm': 3.7179861068725586, 'learning_rate': 4.6940027934735954e-07, 'epoch': 9.6}\n",
+ "{'loss': 0.3401, 'grad_norm': 4.62000036239624, 'learning_rate': 4.277569313094809e-07, 'epoch': 9.62}\n",
+ "{'loss': 0.4521, 'grad_norm': 4.725619792938232, 'learning_rate': 3.8803966999139684e-07, 'epoch': 9.64}\n",
+ "{'loss': 0.4075, 'grad_norm': 3.523742914199829, 'learning_rate': 3.50250038573019e-07, 'epoch': 9.66}\n",
+ "{'loss': 0.3438, 'grad_norm': 3.7823429107666016, 'learning_rate': 3.143895053378698e-07, 'epoch': 9.67}\n",
+ "{'loss': 0.2996, 'grad_norm': 3.2718749046325684, 'learning_rate': 2.8045946361601183e-07, 'epoch': 9.69}\n",
+ "{'loss': 0.4503, 'grad_norm': 5.158358097076416, 'learning_rate': 2.4846123172992954e-07, 'epoch': 9.71}\n",
+ "{'loss': 0.3938, 'grad_norm': 3.8553905487060547, 'learning_rate': 2.1839605294330933e-07, 'epoch': 9.73}\n",
+ "{'loss': 0.4459, 'grad_norm': 4.788202285766602, 'learning_rate': 1.9026509541272275e-07, 'epoch': 9.75}\n",
+ "{'loss': 0.3762, 'grad_norm': 4.024471759796143, 'learning_rate': 1.640694521422459e-07, 'epoch': 9.76}\n",
+ "{'loss': 0.4065, 'grad_norm': 5.944757461547852, 'learning_rate': 1.3981014094099353e-07, 'epoch': 9.78}\n",
+ "{'loss': 0.3105, 'grad_norm': 3.0800580978393555, 'learning_rate': 1.1748810438355628e-07, 'epoch': 9.8}\n",
+ "{'loss': 0.4782, 'grad_norm': 3.273432731628418, 'learning_rate': 9.710420977340762e-08, 'epoch': 9.82}\n",
+ "{'loss': 0.3914, 'grad_norm': 4.411673069000244, 'learning_rate': 7.865924910916977e-08, 'epoch': 9.83}\n",
+ "{'loss': 0.3274, 'grad_norm': 4.555184364318848, 'learning_rate': 6.215393905388278e-08, 'epoch': 9.85}\n",
+ "{'loss': 0.289, 'grad_norm': 5.107693672180176, 'learning_rate': 4.7588920907110094e-08, 'epoch': 9.87}\n",
+ "{'loss': 0.3202, 'grad_norm': 4.9626617431640625, 'learning_rate': 3.496476058006959e-08, 'epoch': 9.89}\n",
+ "{'loss': 0.433, 'grad_norm': 5.598171234130859, 'learning_rate': 2.4281948573617874e-08, 'epoch': 9.91}\n",
+ "{'loss': 0.4018, 'grad_norm': 4.289453029632568, 'learning_rate': 1.5540899959187727e-08, 'epoch': 9.92}\n",
+ "{'loss': 0.3691, 'grad_norm': 4.765395641326904, 'learning_rate': 8.741954362678772e-09, 'epoch': 9.94}\n",
+ "{'loss': 0.3645, 'grad_norm': 5.474503993988037, 'learning_rate': 3.885375951256931e-09, 'epoch': 9.96}\n",
+ "{'loss': 0.4003, 'grad_norm': 3.922280788421631, 'learning_rate': 9.713534230904041e-10, 'epoch': 9.98}\n",
+ "{'loss': 0.382, 'grad_norm': 4.276446342468262, 'learning_rate': 0.0, 'epoch': 10.0}\n",
+ "100%|█████████████████████████████████████| 5600/5600 [1:59:00<00:00, 1.26s/it][INFO|trainer.py:3788] 2024-06-29 23:58:00,034 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-06-29 23:58:00,034 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-06-29 23:58:00,034 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 9%|███▊ | 4/46 [00:00<00:01, 36.19it/s]\u001b[A\n",
+ " 17%|███████▋ | 8/46 [00:00<00:01, 28.86it/s]\u001b[A\n",
+ " 24%|██████████▎ | 11/46 [00:00<00:01, 23.27it/s]\u001b[A\n",
+ " 30%|█████████████ | 14/46 [00:00<00:01, 25.05it/s]\u001b[A\n",
+ " 37%|███████████████▉ | 17/46 [00:00<00:01, 25.40it/s]\u001b[A\n",
+ " 43%|██████████████████▋ | 20/46 [00:00<00:01, 25.19it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 23/46 [00:00<00:00, 25.41it/s]\u001b[A\n",
+ " 57%|████████████████████████▎ | 26/46 [00:01<00:00, 25.58it/s]\u001b[A\n",
+ " 63%|███████████████████████████ | 29/46 [00:01<00:00, 26.01it/s]\u001b[A\n",
+ " 70%|█████████████████████████████▉ | 32/46 [00:01<00:00, 26.06it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 35/46 [00:01<00:00, 25.63it/s]\u001b[A\n",
+ " 83%|███████████████████████████████████▌ | 38/46 [00:01<00:00, 25.11it/s]\u001b[A\n",
+ " 89%|██████████████████████████████████████▎ | 41/46 [00:01<00:00, 25.13it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 3.466093063354492, 'eval_runtime': 1.8384, 'eval_samples_per_second': 25.021, 'eval_steps_per_second': 25.021, 'epoch': 10.0}\n",
+ "100%|█████████████████████████████████████| 5600/5600 [1:59:01<00:00, 1.26s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:01<00:00, 25.62it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-06-29 23:58:01,873 >> Saving model checkpoint to saves/qwen2-0.5b/lora/sft/checkpoint-5600\n",
+ "[INFO|configuration_utils.py:733] 2024-06-29 23:58:02,446 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-06-29 23:58:02,446 >> Model config Qwen2Config {\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-06-29 23:58:02,476 >> tokenizer config file saved in saves/qwen2-0.5b/lora/sft/checkpoint-5600/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-06-29 23:58:02,476 >> Special tokens file saved in saves/qwen2-0.5b/lora/sft/checkpoint-5600/special_tokens_map.json\n",
+ "[INFO|trainer.py:2383] 2024-06-29 23:58:02,637 >> \n",
+ "\n",
+ "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
+ "\n",
+ "\n",
+ "{'train_runtime': 7142.6727, 'train_samples_per_second': 6.275, 'train_steps_per_second': 0.784, 'train_loss': 1.0784291120512144, 'epoch': 10.0}\n",
+ "100%|█████████████████████████████████████| 5600/5600 [1:59:02<00:00, 1.28s/it]\n",
+ "[INFO|trainer.py:3478] 2024-06-29 23:58:02,640 >> Saving model checkpoint to saves/qwen2-0.5b/lora/sft\n",
+ "[INFO|configuration_utils.py:733] 2024-06-29 23:58:03,159 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-06-29 23:58:03,160 >> Model config Qwen2Config {\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-06-29 23:58:03,220 >> tokenizer config file saved in saves/qwen2-0.5b/lora/sft/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-06-29 23:58:03,220 >> Special tokens file saved in saves/qwen2-0.5b/lora/sft/special_tokens_map.json\n",
+ "***** train metrics *****\n",
+ " epoch = 9.9955\n",
+ " total_flos = 7657006GF\n",
+ " train_loss = 1.0784\n",
+ " train_runtime = 1:59:02.67\n",
+ " train_samples_per_second = 6.275\n",
+ " train_steps_per_second = 0.784\n",
+ "Figure saved at: saves/qwen2-0.5b/lora/sft/training_loss.png\n",
+ "Figure saved at: saves/qwen2-0.5b/lora/sft/training_eval_loss.png\n",
+ "[INFO|trainer.py:3788] 2024-06-29 23:58:03,541 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-06-29 23:58:03,541 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-06-29 23:58:03,541 >> Batch size = 1\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:01<00:00, 25.51it/s]\n",
+ "***** eval metrics *****\n",
+ " epoch = 9.9955\n",
+ " eval_loss = 3.4661\n",
+ " eval_runtime = 0:00:01.85\n",
+ " eval_samples_per_second = 24.833\n",
+ " eval_steps_per_second = 24.833\n",
+ "[INFO|modelcard.py:449] 2024-06-29 23:58:05,395 >> Dropping the following result as it does not have all the necessary fields:\n",
+ "{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}}\n",
+ "CPU times: user 1min 32s, sys: 30.2 s, total: 2min 2s\n",
+ "Wall time: 1h 59min 52s\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "!./scripts/tune-lf.sh config/qwen2_0.5b_lora_sft.yaml"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Current Directory:\n",
+ "/home/inflaton/code/projects/courses/llm-finetuning/llama-factory\n",
+ "06/30/2024 06:14:31 - INFO - llamafactory.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, compute dtype: torch.bfloat16\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 06:14:31,888 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/vocab.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 06:14:31,888 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/merges.txt\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 06:14:31,888 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 06:14:31,888 >> loading file added_tokens.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 06:14:31,888 >> loading file special_tokens_map.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 06:14:31,888 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer_config.json\n",
+ "[WARNING|logging.py:313] 2024-06-30 06:14:32,031 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "06/30/2024 06:14:32 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+ "06/30/2024 06:14:32 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+ "06/30/2024 06:14:32 - INFO - llamafactory.data.loader - Loading dataset alpaca_mac.json...\n",
+ "Converting format of dataset (num_proc=16): 100%|█| 4528/4528 [00:00<00:00, 1488\n",
+ "Running tokenizer on dataset (num_proc=16): 100%|█| 4528/4528 [00:01<00:00, 3433\n",
+ "input_ids:\n",
+ "[151644, 872, 198, 5501, 14683, 279, 2701, 8453, 1467, 1119, 6364, 323, 3410, 1172, 279, 24531, 2213, 11, 4302, 770, 624, 35987, 102895, 99164, 100324, 100717, 100095, 99509, 1773, 151645, 198, 151644, 77091, 198, 17949, 358, 572, 2617, 553, 264, 38835, 44486, 13, 151645]\n",
+ "inputs:\n",
+ "<|im_start|>user\n",
+ "Please translate the following Chinese text into English and provide only the translated content, nothing else.\n",
+ "全仗着狐仙搭救。<|im_end|>\n",
+ "<|im_start|>assistant\n",
+ "Because I was protected by a fox fairy.<|im_end|>\n",
+ "label_ids:\n",
+ "[-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 17949, 358, 572, 2617, 553, 264, 38835, 44486, 13, 151645]\n",
+ "labels:\n",
+ "Because I was protected by a fox fairy.<|im_end|>\n",
+ "[INFO|configuration_utils.py:733] 2024-06-30 06:14:35,044 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-06-30 06:14:35,045 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|modeling_utils.py:3556] 2024-06-30 06:14:35,702 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/model.safetensors\n",
+ "[INFO|modeling_utils.py:1531] 2024-06-30 06:14:37,609 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+ "[INFO|configuration_utils.py:1000] 2024-06-30 06:14:37,613 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645\n",
+ "}\n",
+ "\n",
+ "[INFO|modeling_utils.py:4364] 2024-06-30 06:16:33,749 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+ "\n",
+ "[INFO|modeling_utils.py:4372] 2024-06-30 06:16:33,749 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-1.5B-Instruct.\n",
+ "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+ "[INFO|configuration_utils.py:955] 2024-06-30 06:16:34,027 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/generation_config.json\n",
+ "[INFO|configuration_utils.py:1000] 2024-06-30 06:16:34,027 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"do_sample\": true,\n",
+ " \"eos_token_id\": [\n",
+ " 151645,\n",
+ " 151643\n",
+ " ],\n",
+ " \"pad_token_id\": 151643,\n",
+ " \"repetition_penalty\": 1.1,\n",
+ " \"temperature\": 0.7,\n",
+ " \"top_k\": 20,\n",
+ " \"top_p\": 0.8\n",
+ "}\n",
+ "\n",
+ "06/30/2024 06:16:34 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.\n",
+ "06/30/2024 06:16:34 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
+ "06/30/2024 06:16:34 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.\n",
+ "06/30/2024 06:16:34 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA\n",
+ "06/30/2024 06:16:34 - INFO - llamafactory.model.model_utils.misc - Found linear modules: k_proj,q_proj,v_proj,gate_proj,up_proj,o_proj,down_proj\n",
+ "06/30/2024 06:16:34 - INFO - llamafactory.model.loader - trainable params: 9,232,384 || all params: 1,552,946,688 || trainable%: 0.5945\n",
+ "[INFO|trainer.py:642] 2024-06-30 06:16:34,928 >> Using auto half precision backend\n",
+ "[INFO|trainer.py:2128] 2024-06-30 06:16:35,081 >> ***** Running training *****\n",
+ "[INFO|trainer.py:2129] 2024-06-30 06:16:35,081 >> Num examples = 4,482\n",
+ "[INFO|trainer.py:2130] 2024-06-30 06:16:35,081 >> Num Epochs = 10\n",
+ "[INFO|trainer.py:2131] 2024-06-30 06:16:35,081 >> Instantaneous batch size per device = 1\n",
+ "[INFO|trainer.py:2134] 2024-06-30 06:16:35,081 >> Total train batch size (w. parallel, distributed & accumulation) = 8\n",
+ "[INFO|trainer.py:2135] 2024-06-30 06:16:35,081 >> Gradient Accumulation steps = 8\n",
+ "[INFO|trainer.py:2136] 2024-06-30 06:16:35,081 >> Total optimization steps = 5,600\n",
+ "[INFO|trainer.py:2137] 2024-06-30 06:16:35,083 >> Number of trainable parameters = 9,232,384\n",
+ "{'loss': 2.1598, 'grad_norm': 1.7301031351089478, 'learning_rate': 1.7857142857142857e-06, 'epoch': 0.02}\n",
+ "{'loss': 2.2894, 'grad_norm': 1.9221487045288086, 'learning_rate': 3.5714285714285714e-06, 'epoch': 0.04}\n",
+ "{'loss': 2.152, 'grad_norm': 1.5344856977462769, 'learning_rate': 5.357142857142857e-06, 'epoch': 0.05}\n",
+ "{'loss': 2.1602, 'grad_norm': 3.0139236450195312, 'learning_rate': 7.142857142857143e-06, 'epoch': 0.07}\n",
+ "{'loss': 2.3519, 'grad_norm': 1.501677393913269, 'learning_rate': 8.92857142857143e-06, 'epoch': 0.09}\n",
+ "{'loss': 2.05, 'grad_norm': 1.680209994316101, 'learning_rate': 1.0714285714285714e-05, 'epoch': 0.11}\n",
+ "{'loss': 2.1875, 'grad_norm': 1.694694995880127, 'learning_rate': 1.25e-05, 'epoch': 0.12}\n",
+ "{'loss': 1.9546, 'grad_norm': 1.5895333290100098, 'learning_rate': 1.4285714285714285e-05, 'epoch': 0.14}\n",
+ "{'loss': 2.075, 'grad_norm': 2.115245819091797, 'learning_rate': 1.6071428571428572e-05, 'epoch': 0.16}\n",
+ "{'loss': 1.9713, 'grad_norm': 1.3625324964523315, 'learning_rate': 1.785714285714286e-05, 'epoch': 0.18}\n",
+ "{'loss': 2.0099, 'grad_norm': 1.7853630781173706, 'learning_rate': 1.9642857142857145e-05, 'epoch': 0.2}\n",
+ "{'loss': 1.9603, 'grad_norm': 1.3131749629974365, 'learning_rate': 2.1428571428571428e-05, 'epoch': 0.21}\n",
+ "{'loss': 1.9619, 'grad_norm': 1.6807270050048828, 'learning_rate': 2.3214285714285715e-05, 'epoch': 0.23}\n",
+ "{'loss': 1.9889, 'grad_norm': 2.136683464050293, 'learning_rate': 2.5e-05, 'epoch': 0.25}\n",
+ "{'loss': 1.8445, 'grad_norm': 1.5379092693328857, 'learning_rate': 2.6785714285714288e-05, 'epoch': 0.27}\n",
+ "{'loss': 1.8162, 'grad_norm': 1.4818131923675537, 'learning_rate': 2.857142857142857e-05, 'epoch': 0.29}\n",
+ "{'loss': 1.9681, 'grad_norm': 1.3765653371810913, 'learning_rate': 3.0357142857142857e-05, 'epoch': 0.3}\n",
+ "{'loss': 1.7704, 'grad_norm': 1.7519148588180542, 'learning_rate': 3.2142857142857144e-05, 'epoch': 0.32}\n",
+ "{'loss': 1.8997, 'grad_norm': 2.2547669410705566, 'learning_rate': 3.392857142857143e-05, 'epoch': 0.34}\n",
+ "{'loss': 2.0083, 'grad_norm': 1.9038093090057373, 'learning_rate': 3.571428571428572e-05, 'epoch': 0.36}\n",
+ "{'loss': 1.9641, 'grad_norm': 1.864136815071106, 'learning_rate': 3.7500000000000003e-05, 'epoch': 0.37}\n",
+ "{'loss': 1.8745, 'grad_norm': 2.456977605819702, 'learning_rate': 3.928571428571429e-05, 'epoch': 0.39}\n",
+ "{'loss': 1.8564, 'grad_norm': 2.0037779808044434, 'learning_rate': 4.107142857142857e-05, 'epoch': 0.41}\n",
+ "{'loss': 2.0248, 'grad_norm': 2.459550619125366, 'learning_rate': 4.2857142857142856e-05, 'epoch': 0.43}\n",
+ "{'loss': 1.9225, 'grad_norm': 2.4255712032318115, 'learning_rate': 4.464285714285715e-05, 'epoch': 0.45}\n",
+ "{'loss': 1.8559, 'grad_norm': 2.2272531986236572, 'learning_rate': 4.642857142857143e-05, 'epoch': 0.46}\n",
+ "{'loss': 1.916, 'grad_norm': 3.067957878112793, 'learning_rate': 4.8214285714285716e-05, 'epoch': 0.48}\n",
+ "{'loss': 1.9695, 'grad_norm': 2.689528226852417, 'learning_rate': 5e-05, 'epoch': 0.5}\n",
+ "{'loss': 1.7267, 'grad_norm': 1.640542984008789, 'learning_rate': 5.1785714285714296e-05, 'epoch': 0.52}\n",
+ "{'loss': 1.8751, 'grad_norm': 2.6767070293426514, 'learning_rate': 5.3571428571428575e-05, 'epoch': 0.54}\n",
+ "{'loss': 1.8821, 'grad_norm': 2.2540671825408936, 'learning_rate': 5.535714285714286e-05, 'epoch': 0.55}\n",
+ "{'loss': 1.7133, 'grad_norm': 3.7877705097198486, 'learning_rate': 5.714285714285714e-05, 'epoch': 0.57}\n",
+ "{'loss': 1.7552, 'grad_norm': 2.7244925498962402, 'learning_rate': 5.8928571428571435e-05, 'epoch': 0.59}\n",
+ "{'loss': 1.8089, 'grad_norm': 2.4050076007843018, 'learning_rate': 6.0714285714285715e-05, 'epoch': 0.61}\n",
+ "{'loss': 1.8102, 'grad_norm': 3.4505980014801025, 'learning_rate': 6.25e-05, 'epoch': 0.62}\n",
+ "{'loss': 1.7452, 'grad_norm': 1.8404840230941772, 'learning_rate': 6.428571428571429e-05, 'epoch': 0.64}\n",
+ "{'loss': 1.9171, 'grad_norm': 2.923614025115967, 'learning_rate': 6.607142857142857e-05, 'epoch': 0.66}\n",
+ "{'loss': 1.8893, 'grad_norm': 2.2417802810668945, 'learning_rate': 6.785714285714286e-05, 'epoch': 0.68}\n",
+ "{'loss': 1.6041, 'grad_norm': 1.8358319997787476, 'learning_rate': 6.964285714285715e-05, 'epoch': 0.7}\n",
+ "{'loss': 1.7782, 'grad_norm': 2.7531838417053223, 'learning_rate': 7.142857142857143e-05, 'epoch': 0.71}\n",
+ "{'loss': 1.8365, 'grad_norm': 2.2503859996795654, 'learning_rate': 7.321428571428571e-05, 'epoch': 0.73}\n",
+ "{'loss': 1.863, 'grad_norm': 1.8987295627593994, 'learning_rate': 7.500000000000001e-05, 'epoch': 0.75}\n",
+ "{'loss': 1.8407, 'grad_norm': 2.950441598892212, 'learning_rate': 7.67857142857143e-05, 'epoch': 0.77}\n",
+ "{'loss': 1.7695, 'grad_norm': 3.3668158054351807, 'learning_rate': 7.857142857142858e-05, 'epoch': 0.79}\n",
+ "{'loss': 1.6759, 'grad_norm': 1.843374252319336, 'learning_rate': 8.035714285714287e-05, 'epoch': 0.8}\n",
+ "{'loss': 1.7465, 'grad_norm': 2.3402576446533203, 'learning_rate': 8.214285714285714e-05, 'epoch': 0.82}\n",
+ "{'loss': 1.7852, 'grad_norm': 3.2396647930145264, 'learning_rate': 8.392857142857144e-05, 'epoch': 0.84}\n",
+ "{'loss': 1.7626, 'grad_norm': 2.432474136352539, 'learning_rate': 8.571428571428571e-05, 'epoch': 0.86}\n",
+ "{'loss': 1.8173, 'grad_norm': 1.9021589756011963, 'learning_rate': 8.75e-05, 'epoch': 0.87}\n",
+ "{'loss': 1.9716, 'grad_norm': 1.968782901763916, 'learning_rate': 8.92857142857143e-05, 'epoch': 0.89}\n",
+ "{'loss': 1.8814, 'grad_norm': 2.0488665103912354, 'learning_rate': 9.107142857142857e-05, 'epoch': 0.91}\n",
+ "{'loss': 1.7689, 'grad_norm': 2.5687661170959473, 'learning_rate': 9.285714285714286e-05, 'epoch': 0.93}\n",
+ "{'loss': 1.8, 'grad_norm': 3.141063690185547, 'learning_rate': 9.464285714285715e-05, 'epoch': 0.95}\n",
+ "{'loss': 1.8067, 'grad_norm': 2.3366873264312744, 'learning_rate': 9.642857142857143e-05, 'epoch': 0.96}\n",
+ "{'loss': 1.7689, 'grad_norm': 2.356125831604004, 'learning_rate': 9.821428571428572e-05, 'epoch': 0.98}\n",
+ "{'loss': 1.7444, 'grad_norm': 1.962470293045044, 'learning_rate': 0.0001, 'epoch': 1.0}\n",
+ " 10%|███▊ | 560/5600 [15:11<2:19:14, 1.66s/it][INFO|trainer.py:3788] 2024-06-30 06:31:46,942 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-06-30 06:31:46,942 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-06-30 06:31:46,942 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 7%|██▊ | 3/46 [00:00<00:01, 27.37it/s]\u001b[A\n",
+ " 13%|█████▋ | 6/46 [00:00<00:01, 22.33it/s]\u001b[A\n",
+ " 20%|████████▌ | 9/46 [00:00<00:01, 20.97it/s]\u001b[A\n",
+ " 26%|███████████▏ | 12/46 [00:00<00:01, 20.64it/s]\u001b[A\n",
+ " 33%|██████████████ | 15/46 [00:00<00:01, 20.61it/s]\u001b[A\n",
+ " 39%|████████████████▊ | 18/46 [00:00<00:01, 19.71it/s]\u001b[A\n",
+ " 43%|██████████████████▋ | 20/46 [00:00<00:01, 19.38it/s]\u001b[A\n",
+ " 48%|████████████████████▌ | 22/46 [00:01<00:01, 18.71it/s]\u001b[A\n",
+ " 52%|██████████████████████▍ | 24/46 [00:01<00:01, 18.19it/s]\u001b[A\n",
+ " 57%|████████████████████████▎ | 26/46 [00:01<00:01, 18.21it/s]\u001b[A\n",
+ " 61%|██████████████████████████▏ | 28/46 [00:01<00:00, 18.29it/s]\u001b[A\n",
+ " 65%|████████████████████████████ | 30/46 [00:01<00:00, 17.90it/s]\u001b[A\n",
+ " 70%|█████████████████████████████▉ | 32/46 [00:01<00:00, 16.92it/s]\u001b[A\n",
+ " 74%|███████████████████████████████▊ | 34/46 [00:01<00:00, 17.03it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▋ | 36/46 [00:01<00:00, 17.24it/s]\u001b[A\n",
+ " 83%|███████████████████████████████████▌ | 38/46 [00:02<00:00, 17.33it/s]\u001b[A\n",
+ " 87%|█████████████████████████████████████▍ | 40/46 [00:02<00:00, 17.09it/s]\u001b[A\n",
+ " 91%|███████████████████████████████████████▎ | 42/46 [00:02<00:00, 16.80it/s]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▏ | 44/46 [00:02<00:00, 17.10it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 1.7407625913619995, 'eval_runtime': 2.569, 'eval_samples_per_second': 17.906, 'eval_steps_per_second': 17.906, 'epoch': 1.0}\n",
+ " 10%|███▊ | 560/5600 [15:14<2:19:14, 1.66s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:02<00:00, 16.67it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-06-30 06:31:49,511 >> Saving model checkpoint to saves/qwen2-1.5b/lora/sft/checkpoint-560\n",
+ "[INFO|configuration_utils.py:733] 2024-06-30 06:31:50,591 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-06-30 06:31:50,592 >> Model config Qwen2Config {\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-06-30 06:31:50,659 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/checkpoint-560/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-06-30 06:31:50,660 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/checkpoint-560/special_tokens_map.json\n",
+ "{'loss': 1.6815, 'grad_norm': 2.0423543453216553, 'learning_rate': 9.999902864657691e-05, 'epoch': 1.02}\n",
+ "{'loss': 1.5142, 'grad_norm': 3.1950676441192627, 'learning_rate': 9.999611462404875e-05, 'epoch': 1.04}\n",
+ "{'loss': 1.6976, 'grad_norm': 2.1450624465942383, 'learning_rate': 9.999125804563732e-05, 'epoch': 1.05}\n",
+ "{'loss': 1.636, 'grad_norm': 2.6176905632019043, 'learning_rate': 9.998445910004082e-05, 'epoch': 1.07}\n",
+ "{'loss': 1.6114, 'grad_norm': 2.9323713779449463, 'learning_rate': 9.997571805142639e-05, 'epoch': 1.09}\n",
+ "{'loss': 1.6291, 'grad_norm': 2.9673070907592773, 'learning_rate': 9.996503523941994e-05, 'epoch': 1.11}\n",
+ "{'loss': 1.6302, 'grad_norm': 2.463287591934204, 'learning_rate': 9.99524110790929e-05, 'epoch': 1.12}\n",
+ "{'loss': 1.6341, 'grad_norm': 4.124421119689941, 'learning_rate': 9.993784606094612e-05, 'epoch': 1.14}\n",
+ "{'loss': 1.6547, 'grad_norm': 2.851663589477539, 'learning_rate': 9.992134075089084e-05, 'epoch': 1.16}\n",
+ "{'loss': 1.5286, 'grad_norm': 2.5066659450531006, 'learning_rate': 9.99028957902266e-05, 'epoch': 1.18}\n",
+ "{'loss': 1.4801, 'grad_norm': 2.078930139541626, 'learning_rate': 9.988251189561645e-05, 'epoch': 1.2}\n",
+ "{'loss': 1.6318, 'grad_norm': 3.086003065109253, 'learning_rate': 9.986018985905901e-05, 'epoch': 1.21}\n",
+ "{'loss': 1.5591, 'grad_norm': 3.057227849960327, 'learning_rate': 9.983593054785776e-05, 'epoch': 1.23}\n",
+ "{'loss': 1.6401, 'grad_norm': 3.679922342300415, 'learning_rate': 9.980973490458728e-05, 'epoch': 1.25}\n",
+ "{'loss': 1.6262, 'grad_norm': 3.8075058460235596, 'learning_rate': 9.978160394705668e-05, 'epoch': 1.27}\n",
+ "{'loss': 1.7599, 'grad_norm': 3.5445713996887207, 'learning_rate': 9.975153876827008e-05, 'epoch': 1.29}\n",
+ "{'loss': 1.6814, 'grad_norm': 2.6588189601898193, 'learning_rate': 9.971954053638399e-05, 'epoch': 1.3}\n",
+ "{'loss': 1.6972, 'grad_norm': 2.6084141731262207, 'learning_rate': 9.968561049466214e-05, 'epoch': 1.32}\n",
+ "{'loss': 1.6675, 'grad_norm': 3.312152147293091, 'learning_rate': 9.964974996142698e-05, 'epoch': 1.34}\n",
+ "{'loss': 1.4381, 'grad_norm': 3.4132375717163086, 'learning_rate': 9.961196033000861e-05, 'epoch': 1.36}\n",
+ "{'loss': 1.6732, 'grad_norm': 3.6682002544403076, 'learning_rate': 9.957224306869053e-05, 'epoch': 1.37}\n",
+ "{'loss': 1.5185, 'grad_norm': 4.421182155609131, 'learning_rate': 9.953059972065265e-05, 'epoch': 1.39}\n",
+ "{'loss': 1.3911, 'grad_norm': 2.5544440746307373, 'learning_rate': 9.948703190391131e-05, 'epoch': 1.41}\n",
+ "{'loss': 1.6939, 'grad_norm': 3.4235222339630127, 'learning_rate': 9.944154131125642e-05, 'epoch': 1.43}\n",
+ "{'loss': 1.5821, 'grad_norm': 3.2818450927734375, 'learning_rate': 9.939412971018574e-05, 'epoch': 1.45}\n",
+ "{'loss': 1.551, 'grad_norm': 3.252692461013794, 'learning_rate': 9.934479894283606e-05, 'epoch': 1.46}\n",
+ "{'loss': 1.7187, 'grad_norm': 2.9500677585601807, 'learning_rate': 9.92935509259118e-05, 'epoch': 1.48}\n",
+ "{'loss': 1.5578, 'grad_norm': 3.451415538787842, 'learning_rate': 9.924038765061042e-05, 'epoch': 1.5}\n",
+ "{'loss': 1.4891, 'grad_norm': 2.3982598781585693, 'learning_rate': 9.918531118254507e-05, 'epoch': 1.52}\n",
+ "{'loss': 1.6728, 'grad_norm': 3.524627685546875, 'learning_rate': 9.912832366166442e-05, 'epoch': 1.54}\n",
+ "{'loss': 1.6112, 'grad_norm': 3.316537857055664, 'learning_rate': 9.906942730216939e-05, 'epoch': 1.55}\n",
+ "{'loss': 1.547, 'grad_norm': 2.789212465286255, 'learning_rate': 9.900862439242719e-05, 'epoch': 1.57}\n",
+ "{'loss': 1.6212, 'grad_norm': 3.1522133350372314, 'learning_rate': 9.894591729488242e-05, 'epoch': 1.59}\n",
+ "{'loss': 1.7589, 'grad_norm': 2.6350767612457275, 'learning_rate': 9.888130844596524e-05, 'epoch': 1.61}\n",
+ "{'loss': 1.5101, 'grad_norm': 2.931504487991333, 'learning_rate': 9.881480035599667e-05, 'epoch': 1.62}\n",
+ "{'loss': 1.6024, 'grad_norm': 2.5779600143432617, 'learning_rate': 9.874639560909117e-05, 'epoch': 1.64}\n",
+ "{'loss': 1.5994, 'grad_norm': 3.0192410945892334, 'learning_rate': 9.867609686305617e-05, 'epoch': 1.66}\n",
+ "{'loss': 1.5899, 'grad_norm': 2.50893497467041, 'learning_rate': 9.860390684928873e-05, 'epoch': 1.68}\n",
+ "{'loss': 1.5526, 'grad_norm': 3.570330858230591, 'learning_rate': 9.852982837266955e-05, 'epoch': 1.7}\n",
+ "{'loss': 1.5617, 'grad_norm': 4.337871074676514, 'learning_rate': 9.84538643114539e-05, 'epoch': 1.71}\n",
+ "{'loss': 1.5299, 'grad_norm': 2.3411428928375244, 'learning_rate': 9.837601761715983e-05, 'epoch': 1.73}\n",
+ "{'loss': 1.6652, 'grad_norm': 2.955780029296875, 'learning_rate': 9.829629131445342e-05, 'epoch': 1.75}\n",
+ "{'loss': 1.651, 'grad_norm': 2.441587209701538, 'learning_rate': 9.82146885010314e-05, 'epoch': 1.77}\n",
+ "{'loss': 1.5477, 'grad_norm': 2.947199821472168, 'learning_rate': 9.81312123475006e-05, 'epoch': 1.78}\n",
+ "{'loss': 1.5604, 'grad_norm': 2.740534543991089, 'learning_rate': 9.804586609725499e-05, 'epoch': 1.8}\n",
+ "{'loss': 1.5216, 'grad_norm': 2.7406256198883057, 'learning_rate': 9.79586530663494e-05, 'epoch': 1.82}\n",
+ "{'loss': 1.4901, 'grad_norm': 2.576497793197632, 'learning_rate': 9.78695766433709e-05, 'epoch': 1.84}\n",
+ "{'loss': 1.6326, 'grad_norm': 2.4222359657287598, 'learning_rate': 9.777864028930705e-05, 'epoch': 1.86}\n",
+ "{'loss': 1.4982, 'grad_norm': 3.2682604789733887, 'learning_rate': 9.768584753741134e-05, 'epoch': 1.87}\n",
+ "{'loss': 1.5688, 'grad_norm': 2.756934642791748, 'learning_rate': 9.759120199306613e-05, 'epoch': 1.89}\n",
+ "{'loss': 1.6835, 'grad_norm': 3.1586759090423584, 'learning_rate': 9.74947073336423e-05, 'epoch': 1.91}\n",
+ "{'loss': 1.7065, 'grad_norm': 3.218165874481201, 'learning_rate': 9.73963673083566e-05, 'epoch': 1.93}\n",
+ "{'loss': 1.6155, 'grad_norm': 2.732252836227417, 'learning_rate': 9.72961857381258e-05, 'epoch': 1.95}\n",
+ "{'loss': 1.5021, 'grad_norm': 2.702173948287964, 'learning_rate': 9.719416651541839e-05, 'epoch': 1.96}\n",
+ "{'loss': 1.6002, 'grad_norm': 2.3407227993011475, 'learning_rate': 9.709031360410318e-05, 'epoch': 1.98}\n",
+ "{'loss': 1.5955, 'grad_norm': 3.0833232402801514, 'learning_rate': 9.698463103929542e-05, 'epoch': 2.0}\n",
+ " 20%|███████▍ | 1120/5600 [30:57<2:03:57, 1.66s/it][INFO|trainer.py:3788] 2024-06-30 06:47:32,631 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-06-30 06:47:32,631 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-06-30 06:47:32,631 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 7%|██▊ | 3/46 [00:00<00:01, 28.46it/s]\u001b[A\n",
+ " 13%|█████▋ | 6/46 [00:00<00:01, 21.98it/s]\u001b[A\n",
+ " 20%|████████▌ | 9/46 [00:00<00:01, 21.50it/s]\u001b[A\n",
+ " 26%|███████████▏ | 12/46 [00:00<00:01, 20.72it/s]\u001b[A\n",
+ " 33%|██████████████ | 15/46 [00:00<00:01, 20.55it/s]\u001b[A\n",
+ " 39%|████████████████▊ | 18/46 [00:00<00:01, 20.31it/s]\u001b[A\n",
+ " 46%|███████████████████▋ | 21/46 [00:01<00:01, 20.05it/s]\u001b[A\n",
+ " 52%|██████████████████████▍ | 24/46 [00:01<00:01, 19.50it/s]\u001b[A\n",
+ " 59%|█████████████████████████▏ | 27/46 [00:01<00:00, 19.98it/s]\u001b[A\n",
+ " 65%|████████████████████████████ | 30/46 [00:01<00:00, 19.97it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▊ | 33/46 [00:01<00:00, 19.91it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 35/46 [00:01<00:00, 19.41it/s]\u001b[A\n",
+ " 80%|██████████████████████████████████▌ | 37/46 [00:01<00:00, 18.75it/s]\u001b[A\n",
+ " 85%|████████████████████████████████████▍ | 39/46 [00:01<00:00, 18.79it/s]\u001b[A\n",
+ " 89%|██████████████████████████████████████▎ | 41/46 [00:02<00:00, 18.91it/s]\u001b[A\n",
+ " 93%|████████████████████████████████████████▏ | 43/46 [00:02<00:00, 18.56it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 1.6952180862426758, 'eval_runtime': 2.3895, 'eval_samples_per_second': 19.251, 'eval_steps_per_second': 19.251, 'epoch': 2.0}\n",
+ " 20%|███████▍ | 1120/5600 [30:59<2:03:57, 1.66s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:02<00:00, 18.53it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-06-30 06:47:35,021 >> Saving model checkpoint to saves/qwen2-1.5b/lora/sft/checkpoint-1120\n",
+ "[INFO|configuration_utils.py:733] 2024-06-30 06:47:35,643 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-06-30 06:47:35,644 >> Model config Qwen2Config {\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-06-30 06:47:35,688 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/checkpoint-1120/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-06-30 06:47:35,688 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/checkpoint-1120/special_tokens_map.json\n",
+ "{'loss': 1.2986, 'grad_norm': 6.459508895874023, 'learning_rate': 9.687712292719997e-05, 'epoch': 2.02}\n",
+ "{'loss': 1.1686, 'grad_norm': 2.6047580242156982, 'learning_rate': 9.67677934449517e-05, 'epoch': 2.03}\n",
+ "{'loss': 1.2613, 'grad_norm': 4.400974273681641, 'learning_rate': 9.665664684045333e-05, 'epoch': 2.05}\n",
+ "{'loss': 1.1817, 'grad_norm': 3.368881940841675, 'learning_rate': 9.654368743221022e-05, 'epoch': 2.07}\n",
+ "{'loss': 1.1255, 'grad_norm': 2.8357393741607666, 'learning_rate': 9.642891960916268e-05, 'epoch': 2.09}\n",
+ "{'loss': 1.2003, 'grad_norm': 2.8627848625183105, 'learning_rate': 9.631234783051544e-05, 'epoch': 2.11}\n",
+ "{'loss': 1.3588, 'grad_norm': 4.006772041320801, 'learning_rate': 9.619397662556435e-05, 'epoch': 2.12}\n",
+ "{'loss': 1.1791, 'grad_norm': 3.8697452545166016, 'learning_rate': 9.607381059352038e-05, 'epoch': 2.14}\n",
+ "{'loss': 1.1847, 'grad_norm': 4.039665222167969, 'learning_rate': 9.595185440333103e-05, 'epoch': 2.16}\n",
+ "{'loss': 1.1875, 'grad_norm': 4.559266567230225, 'learning_rate': 9.582811279349882e-05, 'epoch': 2.18}\n",
+ "{'loss': 1.2245, 'grad_norm': 3.3498127460479736, 'learning_rate': 9.570259057189717e-05, 'epoch': 2.2}\n",
+ "{'loss': 1.2361, 'grad_norm': 4.742955684661865, 'learning_rate': 9.557529261558367e-05, 'epoch': 2.21}\n",
+ "{'loss': 1.2929, 'grad_norm': 5.568743705749512, 'learning_rate': 9.544622387061055e-05, 'epoch': 2.23}\n",
+ "{'loss': 1.2945, 'grad_norm': 5.399260997772217, 'learning_rate': 9.53153893518325e-05, 'epoch': 2.25}\n",
+ "{'loss': 1.1725, 'grad_norm': 3.5391037464141846, 'learning_rate': 9.518279414271183e-05, 'epoch': 2.27}\n",
+ "{'loss': 1.1956, 'grad_norm': 4.30355978012085, 'learning_rate': 9.504844339512095e-05, 'epoch': 2.28}\n",
+ "{'loss': 1.2378, 'grad_norm': 3.2837555408477783, 'learning_rate': 9.491234232914221e-05, 'epoch': 2.3}\n",
+ "{'loss': 1.2634, 'grad_norm': 4.105693340301514, 'learning_rate': 9.477449623286505e-05, 'epoch': 2.32}\n",
+ "{'loss': 1.3589, 'grad_norm': 3.694589614868164, 'learning_rate': 9.463491046218058e-05, 'epoch': 2.34}\n",
+ "{'loss': 1.3134, 'grad_norm': 3.689924716949463, 'learning_rate': 9.449359044057345e-05, 'epoch': 2.36}\n",
+ "{'loss': 1.2572, 'grad_norm': 4.2927374839782715, 'learning_rate': 9.435054165891109e-05, 'epoch': 2.37}\n",
+ "{'loss': 1.4522, 'grad_norm': 4.005749225616455, 'learning_rate': 9.420576967523049e-05, 'epoch': 2.39}\n",
+ "{'loss': 1.343, 'grad_norm': 4.006478309631348, 'learning_rate': 9.405928011452211e-05, 'epoch': 2.41}\n",
+ "{'loss': 1.181, 'grad_norm': 4.455829620361328, 'learning_rate': 9.391107866851143e-05, 'epoch': 2.43}\n",
+ "{'loss': 1.2442, 'grad_norm': 3.436230421066284, 'learning_rate': 9.376117109543769e-05, 'epoch': 2.45}\n",
+ "{'loss': 1.2157, 'grad_norm': 3.515488386154175, 'learning_rate': 9.360956321983028e-05, 'epoch': 2.46}\n",
+ "{'loss': 1.2723, 'grad_norm': 3.4698567390441895, 'learning_rate': 9.345626093228233e-05, 'epoch': 2.48}\n",
+ "{'loss': 1.3747, 'grad_norm': 4.542730808258057, 'learning_rate': 9.330127018922194e-05, 'epoch': 2.5}\n",
+ "{'loss': 1.2685, 'grad_norm': 3.6365323066711426, 'learning_rate': 9.314459701268065e-05, 'epoch': 2.52}\n",
+ "{'loss': 1.2574, 'grad_norm': 3.8041131496429443, 'learning_rate': 9.298624749005951e-05, 'epoch': 2.53}\n",
+ "{'loss': 1.3031, 'grad_norm': 3.81734037399292, 'learning_rate': 9.282622777389258e-05, 'epoch': 2.55}\n",
+ "{'loss': 1.168, 'grad_norm': 4.677352428436279, 'learning_rate': 9.266454408160779e-05, 'epoch': 2.57}\n",
+ "{'loss': 1.3771, 'grad_norm': 5.038273811340332, 'learning_rate': 9.250120269528546e-05, 'epoch': 2.59}\n",
+ "{'loss': 1.2421, 'grad_norm': 5.5514702796936035, 'learning_rate': 9.233620996141421e-05, 'epoch': 2.61}\n",
+ "{'loss': 1.2833, 'grad_norm': 3.1367263793945312, 'learning_rate': 9.21695722906443e-05, 'epoch': 2.62}\n",
+ "{'loss': 1.2831, 'grad_norm': 2.603522539138794, 'learning_rate': 9.200129615753859e-05, 'epoch': 2.64}\n",
+ "{'loss': 1.2421, 'grad_norm': 3.707820177078247, 'learning_rate': 9.183138810032099e-05, 'epoch': 2.66}\n",
+ "{'loss': 1.3674, 'grad_norm': 3.9344961643218994, 'learning_rate': 9.165985472062246e-05, 'epoch': 2.68}\n",
+ "{'loss': 1.1452, 'grad_norm': 4.652283668518066, 'learning_rate': 9.148670268322438e-05, 'epoch': 2.7}\n",
+ "{'loss': 1.1737, 'grad_norm': 4.732541084289551, 'learning_rate': 9.131193871579975e-05, 'epoch': 2.71}\n",
+ "{'loss': 1.4043, 'grad_norm': 3.7013778686523438, 'learning_rate': 9.113556960865167e-05, 'epoch': 2.73}\n",
+ "{'loss': 1.334, 'grad_norm': 3.8859188556671143, 'learning_rate': 9.09576022144496e-05, 'epoch': 2.75}\n",
+ "{'loss': 1.2964, 'grad_norm': 3.6818110942840576, 'learning_rate': 9.077804344796302e-05, 'epoch': 2.77}\n",
+ "{'loss': 1.3015, 'grad_norm': 3.5502216815948486, 'learning_rate': 9.059690028579284e-05, 'epoch': 2.78}\n",
+ "{'loss': 1.1433, 'grad_norm': 3.0337369441986084, 'learning_rate': 9.041417976610027e-05, 'epoch': 2.8}\n",
+ "{'loss': 1.2503, 'grad_norm': 3.4227890968322754, 'learning_rate': 9.022988898833342e-05, 'epoch': 2.82}\n",
+ "{'loss': 1.2781, 'grad_norm': 3.566080093383789, 'learning_rate': 9.004403511295141e-05, 'epoch': 2.84}\n",
+ "{'loss': 1.2557, 'grad_norm': 4.064306735992432, 'learning_rate': 8.985662536114613e-05, 'epoch': 2.86}\n",
+ "{'loss': 1.4121, 'grad_norm': 3.106153726577759, 'learning_rate': 8.966766701456177e-05, 'epoch': 2.87}\n",
+ "{'loss': 1.2789, 'grad_norm': 3.873041868209839, 'learning_rate': 8.947716741501177e-05, 'epoch': 2.89}\n",
+ "{'loss': 1.2759, 'grad_norm': 3.9415042400360107, 'learning_rate': 8.928513396419368e-05, 'epoch': 2.91}\n",
+ "{'loss': 1.2078, 'grad_norm': 3.456357002258301, 'learning_rate': 8.90915741234015e-05, 'epoch': 2.93}\n",
+ "{'loss': 1.3886, 'grad_norm': 3.5346779823303223, 'learning_rate': 8.889649541323574e-05, 'epoch': 2.95}\n",
+ "{'loss': 1.33, 'grad_norm': 3.6706087589263916, 'learning_rate': 8.869990541331138e-05, 'epoch': 2.96}\n",
+ "{'loss': 1.2564, 'grad_norm': 4.235021591186523, 'learning_rate': 8.850181176196315e-05, 'epoch': 2.98}\n",
+ "{'loss': 1.3518, 'grad_norm': 3.6379354000091553, 'learning_rate': 8.83022221559489e-05, 'epoch': 3.0}\n",
+ " 30%|███████████ | 1680/5600 [46:41<1:48:23, 1.66s/it][INFO|trainer.py:3788] 2024-06-30 07:03:16,574 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-06-30 07:03:16,574 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-06-30 07:03:16,574 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 7%|██▊ | 3/46 [00:00<00:01, 28.95it/s]\u001b[A\n",
+ " 13%|█████▋ | 6/46 [00:00<00:01, 21.09it/s]\u001b[A\n",
+ " 20%|████████▌ | 9/46 [00:00<00:01, 19.49it/s]\u001b[A\n",
+ " 26%|███████████▏ | 12/46 [00:00<00:01, 19.05it/s]\u001b[A\n",
+ " 30%|█████████████ | 14/46 [00:00<00:01, 18.97it/s]\u001b[A\n",
+ " 35%|██████████████▉ | 16/46 [00:00<00:01, 18.52it/s]\u001b[A\n",
+ " 41%|█████████████████▊ | 19/46 [00:00<00:01, 19.03it/s]\u001b[A\n",
+ " 46%|███████████████████▋ | 21/46 [00:01<00:01, 19.12it/s]\u001b[A\n",
+ " 52%|██████████████████████▍ | 24/46 [00:01<00:01, 19.40it/s]\u001b[A\n",
+ " 57%|████████████████████████▎ | 26/46 [00:01<00:01, 19.15it/s]\u001b[A\n",
+ " 61%|██████████████████████████▏ | 28/46 [00:01<00:00, 19.26it/s]\u001b[A\n",
+ " 65%|████████████████████████████ | 30/46 [00:01<00:00, 19.36it/s]\u001b[A\n",
+ " 70%|█████████████████████████████▉ | 32/46 [00:01<00:00, 19.43it/s]\u001b[A\n",
+ " 74%|███████████████████████████████▊ | 34/46 [00:01<00:00, 18.16it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▋ | 36/46 [00:01<00:00, 18.30it/s]\u001b[A\n",
+ " 83%|███████████████████████████████████▌ | 38/46 [00:01<00:00, 18.69it/s]\u001b[A\n",
+ " 87%|█████████████████████████████████████▍ | 40/46 [00:02<00:00, 18.74it/s]\u001b[A\n",
+ " 91%|███████████████████████████████████████▎ | 42/46 [00:02<00:00, 19.09it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 1.7901949882507324, 'eval_runtime': 2.4532, 'eval_samples_per_second': 18.751, 'eval_steps_per_second': 18.751, 'epoch': 3.0}\n",
+ " 30%|███████████ | 1680/5600 [46:43<1:48:23, 1.66s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:02<00:00, 18.83it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-06-30 07:03:19,028 >> Saving model checkpoint to saves/qwen2-1.5b/lora/sft/checkpoint-1680\n",
+ "[INFO|configuration_utils.py:733] 2024-06-30 07:03:19,590 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-06-30 07:03:19,590 >> Model config Qwen2Config {\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-06-30 07:03:19,633 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/checkpoint-1680/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-06-30 07:03:19,633 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/checkpoint-1680/special_tokens_map.json\n",
+ "{'loss': 1.15, 'grad_norm': 3.3373215198516846, 'learning_rate': 8.810114435015054e-05, 'epoch': 3.02}\n",
+ "{'loss': 0.8154, 'grad_norm': 4.1678571701049805, 'learning_rate': 8.789858615727265e-05, 'epoch': 3.03}\n",
+ "{'loss': 1.0096, 'grad_norm': 7.9504194259643555, 'learning_rate': 8.7694555447539e-05, 'epoch': 3.05}\n",
+ "{'loss': 0.9017, 'grad_norm': 3.666703462600708, 'learning_rate': 8.748906014838672e-05, 'epoch': 3.07}\n",
+ "{'loss': 0.8374, 'grad_norm': 3.569261312484741, 'learning_rate': 8.728210824415827e-05, 'epoch': 3.09}\n",
+ "{'loss': 0.9234, 'grad_norm': 5.156108856201172, 'learning_rate': 8.707370777579133e-05, 'epoch': 3.11}\n",
+ "{'loss': 0.8894, 'grad_norm': 3.83931827545166, 'learning_rate': 8.68638668405062e-05, 'epoch': 3.12}\n",
+ "{'loss': 0.9901, 'grad_norm': 4.340090274810791, 'learning_rate': 8.665259359149132e-05, 'epoch': 3.14}\n",
+ "{'loss': 0.8987, 'grad_norm': 5.530636310577393, 'learning_rate': 8.643989623758643e-05, 'epoch': 3.16}\n",
+ "{'loss': 0.94, 'grad_norm': 4.701400279998779, 'learning_rate': 8.622578304296364e-05, 'epoch': 3.18}\n",
+ "{'loss': 0.9471, 'grad_norm': 5.912676811218262, 'learning_rate': 8.601026232680634e-05, 'epoch': 3.2}\n",
+ "{'loss': 0.8883, 'grad_norm': 5.244345188140869, 'learning_rate': 8.579334246298593e-05, 'epoch': 3.21}\n",
+ "{'loss': 1.1187, 'grad_norm': 3.5720531940460205, 'learning_rate': 8.557503187973651e-05, 'epoch': 3.23}\n",
+ "{'loss': 0.8993, 'grad_norm': 4.100275993347168, 'learning_rate': 8.535533905932738e-05, 'epoch': 3.25}\n",
+ "{'loss': 0.9287, 'grad_norm': 3.9435741901397705, 'learning_rate': 8.513427253773346e-05, 'epoch': 3.27}\n",
+ "{'loss': 0.8239, 'grad_norm': 4.083703994750977, 'learning_rate': 8.491184090430364e-05, 'epoch': 3.28}\n",
+ "{'loss': 1.0248, 'grad_norm': 4.739283084869385, 'learning_rate': 8.468805280142709e-05, 'epoch': 3.3}\n",
+ "{'loss': 1.0099, 'grad_norm': 4.6722493171691895, 'learning_rate': 8.446291692419736e-05, 'epoch': 3.32}\n",
+ "{'loss': 0.9861, 'grad_norm': 3.677231550216675, 'learning_rate': 8.423644202007467e-05, 'epoch': 3.34}\n",
+ "{'loss': 0.9352, 'grad_norm': 3.738945245742798, 'learning_rate': 8.400863688854597e-05, 'epoch': 3.36}\n",
+ "{'loss': 1.0084, 'grad_norm': 4.615973949432373, 'learning_rate': 8.377951038078302e-05, 'epoch': 3.37}\n",
+ "{'loss': 0.8946, 'grad_norm': 4.280567169189453, 'learning_rate': 8.354907139929851e-05, 'epoch': 3.39}\n",
+ "{'loss': 0.9536, 'grad_norm': 5.548139572143555, 'learning_rate': 8.33173288976002e-05, 'epoch': 3.41}\n",
+ "{'loss': 1.032, 'grad_norm': 4.183009147644043, 'learning_rate': 8.308429187984297e-05, 'epoch': 3.43}\n",
+ "{'loss': 0.9905, 'grad_norm': 4.598621368408203, 'learning_rate': 8.284996940047903e-05, 'epoch': 3.44}\n",
+ "{'loss': 1.0168, 'grad_norm': 3.7102458477020264, 'learning_rate': 8.261437056390606e-05, 'epoch': 3.46}\n",
+ "{'loss': 0.9419, 'grad_norm': 3.9970738887786865, 'learning_rate': 8.237750452411353e-05, 'epoch': 3.48}\n",
+ "{'loss': 0.945, 'grad_norm': 5.531300067901611, 'learning_rate': 8.213938048432697e-05, 'epoch': 3.5}\n",
+ "{'loss': 0.8867, 'grad_norm': 5.528501510620117, 'learning_rate': 8.190000769665044e-05, 'epoch': 3.52}\n",
+ "{'loss': 0.9773, 'grad_norm': 5.0458807945251465, 'learning_rate': 8.1659395461707e-05, 'epoch': 3.53}\n",
+ "{'loss': 0.9484, 'grad_norm': 7.089639663696289, 'learning_rate': 8.141755312827736e-05, 'epoch': 3.55}\n",
+ "{'loss': 1.0592, 'grad_norm': 5.28053617477417, 'learning_rate': 8.117449009293668e-05, 'epoch': 3.57}\n",
+ "{'loss': 0.8727, 'grad_norm': 3.750885009765625, 'learning_rate': 8.093021579968941e-05, 'epoch': 3.59}\n",
+ "{'loss': 0.9438, 'grad_norm': 3.9763479232788086, 'learning_rate': 8.068473973960238e-05, 'epoch': 3.61}\n",
+ "{'loss': 1.0057, 'grad_norm': 8.926958084106445, 'learning_rate': 8.043807145043604e-05, 'epoch': 3.62}\n",
+ "{'loss': 0.9272, 'grad_norm': 4.707141399383545, 'learning_rate': 8.019022051627388e-05, 'epoch': 3.64}\n",
+ "{'loss': 0.9354, 'grad_norm': 4.845958232879639, 'learning_rate': 7.994119656715002e-05, 'epoch': 3.66}\n",
+ "{'loss': 1.0041, 'grad_norm': 6.272175312042236, 'learning_rate': 7.969100927867507e-05, 'epoch': 3.68}\n",
+ "{'loss': 1.0257, 'grad_norm': 5.634955883026123, 'learning_rate': 7.943966837166023e-05, 'epoch': 3.69}\n",
+ "{'loss': 1.0411, 'grad_norm': 4.726901054382324, 'learning_rate': 7.91871836117395e-05, 'epoch': 3.71}\n",
+ "{'loss': 0.8919, 'grad_norm': 5.341351509094238, 'learning_rate': 7.89335648089903e-05, 'epoch': 3.73}\n",
+ "{'loss': 0.9918, 'grad_norm': 4.697306156158447, 'learning_rate': 7.86788218175523e-05, 'epoch': 3.75}\n",
+ "{'loss': 1.0214, 'grad_norm': 7.20255708694458, 'learning_rate': 7.842296453524463e-05, 'epoch': 3.77}\n",
+ "{'loss': 0.8907, 'grad_norm': 4.981348037719727, 'learning_rate': 7.81660029031811e-05, 'epoch': 3.78}\n",
+ "{'loss': 0.9927, 'grad_norm': 4.630974292755127, 'learning_rate': 7.79079469053842e-05, 'epoch': 3.8}\n",
+ "{'loss': 0.9723, 'grad_norm': 4.9225921630859375, 'learning_rate': 7.764880656839696e-05, 'epoch': 3.82}\n",
+ "{'loss': 0.9968, 'grad_norm': 5.320995807647705, 'learning_rate': 7.738859196089358e-05, 'epoch': 3.84}\n",
+ "{'loss': 0.8093, 'grad_norm': 4.394636154174805, 'learning_rate': 7.712731319328798e-05, 'epoch': 3.86}\n",
+ "{'loss': 0.9058, 'grad_norm': 4.045576572418213, 'learning_rate': 7.68649804173412e-05, 'epoch': 3.87}\n",
+ "{'loss': 1.0048, 'grad_norm': 3.463576316833496, 'learning_rate': 7.660160382576683e-05, 'epoch': 3.89}\n",
+ "{'loss': 0.9774, 'grad_norm': 6.120863914489746, 'learning_rate': 7.633719365183504e-05, 'epoch': 3.91}\n",
+ "{'loss': 0.8715, 'grad_norm': 4.576050758361816, 'learning_rate': 7.60717601689749e-05, 'epoch': 3.93}\n",
+ "{'loss': 0.7799, 'grad_norm': 3.344226360321045, 'learning_rate': 7.580531369037533e-05, 'epoch': 3.94}\n",
+ "{'loss': 1.1199, 'grad_norm': 4.684515476226807, 'learning_rate': 7.553786456858429e-05, 'epoch': 3.96}\n",
+ "{'loss': 1.0056, 'grad_norm': 3.8074159622192383, 'learning_rate': 7.526942319510655e-05, 'epoch': 3.98}\n",
+ "{'loss': 0.8473, 'grad_norm': 3.2416229248046875, 'learning_rate': 7.500000000000001e-05, 'epoch': 4.0}\n",
+ " 40%|██████████████ | 2240/5600 [1:02:28<1:32:15, 1.65s/it][INFO|trainer.py:3788] 2024-06-30 07:19:03,748 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-06-30 07:19:03,748 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-06-30 07:19:03,748 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 7%|██▊ | 3/46 [00:00<00:01, 29.51it/s]\u001b[A\n",
+ " 13%|█████▋ | 6/46 [00:00<00:01, 22.92it/s]\u001b[A\n",
+ " 20%|████████▌ | 9/46 [00:00<00:01, 21.24it/s]\u001b[A\n",
+ " 26%|███████████▏ | 12/46 [00:00<00:01, 20.58it/s]\u001b[A\n",
+ " 33%|██████████████ | 15/46 [00:00<00:01, 20.33it/s]\u001b[A\n",
+ " 39%|████████████████▊ | 18/46 [00:00<00:01, 19.87it/s]\u001b[A\n",
+ " 46%|███████████████████▋ | 21/46 [00:01<00:01, 19.88it/s]\u001b[A\n",
+ " 52%|██████████████████████▍ | 24/46 [00:01<00:01, 19.79it/s]\u001b[A\n",
+ " 57%|████████████████████████▎ | 26/46 [00:01<00:01, 19.64it/s]\u001b[A\n",
+ " 61%|██████████████████████████▏ | 28/46 [00:01<00:00, 19.52it/s]\u001b[A\n",
+ " 65%|████████████████████████████ | 30/46 [00:01<00:00, 19.07it/s]\u001b[A\n",
+ " 70%|█████████████████████████████▉ | 32/46 [00:01<00:00, 18.47it/s]\u001b[A\n",
+ " 74%|███████████████████████████████▊ | 34/46 [00:01<00:00, 18.40it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▋ | 36/46 [00:01<00:00, 18.48it/s]\u001b[A\n",
+ " 83%|███████████████████████████████████▌ | 38/46 [00:01<00:00, 18.44it/s]\u001b[A\n",
+ " 87%|█████████████████████████████████████▍ | 40/46 [00:02<00:00, 18.37it/s]\u001b[A\n",
+ " 91%|███████████████████████████████████████▎ | 42/46 [00:02<00:00, 18.42it/s]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▏ | 44/46 [00:02<00:00, 18.56it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 1.9558732509613037, 'eval_runtime': 2.4173, 'eval_samples_per_second': 19.03, 'eval_steps_per_second': 19.03, 'epoch': 4.0}\n",
+ " 40%|██████████████ | 2240/5600 [1:02:31<1:32:15, 1.65s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:02<00:00, 18.79it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-06-30 07:19:06,166 >> Saving model checkpoint to saves/qwen2-1.5b/lora/sft/checkpoint-2240\n",
+ "[INFO|configuration_utils.py:733] 2024-06-30 07:19:06,736 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-06-30 07:19:06,736 >> Model config Qwen2Config {\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-06-30 07:19:06,775 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/checkpoint-2240/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-06-30 07:19:06,776 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/checkpoint-2240/special_tokens_map.json\n",
+ "{'loss': 0.7957, 'grad_norm': 4.7848052978515625, 'learning_rate': 7.472960545147038e-05, 'epoch': 4.02}\n",
+ "{'loss': 0.631, 'grad_norm': 4.0633649826049805, 'learning_rate': 7.445825005546448e-05, 'epoch': 4.03}\n",
+ "{'loss': 0.7215, 'grad_norm': 4.579054832458496, 'learning_rate': 7.4185944355262e-05, 'epoch': 4.05}\n",
+ "{'loss': 0.6143, 'grad_norm': 4.206972122192383, 'learning_rate': 7.391269893106592e-05, 'epoch': 4.07}\n",
+ "{'loss': 0.6206, 'grad_norm': 4.568854808807373, 'learning_rate': 7.363852439959135e-05, 'epoch': 4.09}\n",
+ "{'loss': 0.7532, 'grad_norm': 4.556376934051514, 'learning_rate': 7.33634314136531e-05, 'epoch': 4.11}\n",
+ "{'loss': 0.5763, 'grad_norm': 5.614088535308838, 'learning_rate': 7.308743066175172e-05, 'epoch': 4.12}\n",
+ "{'loss': 0.6721, 'grad_norm': 9.883162498474121, 'learning_rate': 7.281053286765815e-05, 'epoch': 4.14}\n",
+ "{'loss': 0.7292, 'grad_norm': 4.348223686218262, 'learning_rate': 7.253274878999727e-05, 'epoch': 4.16}\n",
+ "{'loss': 0.6231, 'grad_norm': 3.8863847255706787, 'learning_rate': 7.225408922182961e-05, 'epoch': 4.18}\n",
+ "{'loss': 0.6226, 'grad_norm': 8.888066291809082, 'learning_rate': 7.197456499023225e-05, 'epoch': 4.19}\n",
+ "{'loss': 0.6884, 'grad_norm': 4.336313247680664, 'learning_rate': 7.169418695587791e-05, 'epoch': 4.21}\n",
+ "{'loss': 0.6207, 'grad_norm': 4.8345112800598145, 'learning_rate': 7.141296601261314e-05, 'epoch': 4.23}\n",
+ "{'loss': 0.7374, 'grad_norm': 5.926130771636963, 'learning_rate': 7.113091308703498e-05, 'epoch': 4.25}\n",
+ "{'loss': 0.6514, 'grad_norm': 5.482864856719971, 'learning_rate': 7.084803913806641e-05, 'epoch': 4.27}\n",
+ "{'loss': 0.6823, 'grad_norm': 6.955780029296875, 'learning_rate': 7.056435515653059e-05, 'epoch': 4.28}\n",
+ "{'loss': 0.6645, 'grad_norm': 10.101131439208984, 'learning_rate': 7.027987216472377e-05, 'epoch': 4.3}\n",
+ "{'loss': 0.5592, 'grad_norm': 5.433172225952148, 'learning_rate': 6.999460121598704e-05, 'epoch': 4.32}\n",
+ "{'loss': 0.7167, 'grad_norm': 6.796948432922363, 'learning_rate': 6.970855339427698e-05, 'epoch': 4.34}\n",
+ "{'loss': 0.6169, 'grad_norm': 3.863734006881714, 'learning_rate': 6.942173981373474e-05, 'epoch': 4.36}\n",
+ "{'loss': 0.6398, 'grad_norm': 3.5763421058654785, 'learning_rate': 6.91341716182545e-05, 'epoch': 4.37}\n",
+ "{'loss': 0.7066, 'grad_norm': 4.580504894256592, 'learning_rate': 6.884585998105026e-05, 'epoch': 4.39}\n",
+ "{'loss': 0.647, 'grad_norm': 5.605465412139893, 'learning_rate': 6.855681610422189e-05, 'epoch': 4.41}\n",
+ "{'loss': 0.7305, 'grad_norm': 3.584751605987549, 'learning_rate': 6.826705121831976e-05, 'epoch': 4.43}\n",
+ "{'loss': 0.7089, 'grad_norm': 6.16217041015625, 'learning_rate': 6.797657658190839e-05, 'epoch': 4.44}\n",
+ "{'loss': 0.5937, 'grad_norm': 3.5875444412231445, 'learning_rate': 6.768540348112907e-05, 'epoch': 4.46}\n",
+ "{'loss': 0.7547, 'grad_norm': 4.757628917694092, 'learning_rate': 6.739354322926136e-05, 'epoch': 4.48}\n",
+ "{'loss': 0.6766, 'grad_norm': 5.012269020080566, 'learning_rate': 6.710100716628344e-05, 'epoch': 4.5}\n",
+ "{'loss': 0.6572, 'grad_norm': 5.2274861335754395, 'learning_rate': 6.680780665843155e-05, 'epoch': 4.52}\n",
+ "{'loss': 0.7324, 'grad_norm': 5.329851150512695, 'learning_rate': 6.651395309775837e-05, 'epoch': 4.53}\n",
+ "{'loss': 0.7048, 'grad_norm': 6.628935813903809, 'learning_rate': 6.621945790169036e-05, 'epoch': 4.55}\n",
+ "{'loss': 0.683, 'grad_norm': 5.611133575439453, 'learning_rate': 6.592433251258423e-05, 'epoch': 4.57}\n",
+ "{'loss': 0.678, 'grad_norm': 5.939394950866699, 'learning_rate': 6.562858839728223e-05, 'epoch': 4.59}\n",
+ "{'loss': 0.5917, 'grad_norm': 10.7606201171875, 'learning_rate': 6.533223704666672e-05, 'epoch': 4.61}\n",
+ "{'loss': 0.6841, 'grad_norm': 6.9346089363098145, 'learning_rate': 6.503528997521366e-05, 'epoch': 4.62}\n",
+ "{'loss': 0.7123, 'grad_norm': 5.5321364402771, 'learning_rate': 6.473775872054521e-05, 'epoch': 4.64}\n",
+ "{'loss': 0.6863, 'grad_norm': 4.588550567626953, 'learning_rate': 6.44396548429815e-05, 'epoch': 4.66}\n",
+ "{'loss': 0.6828, 'grad_norm': 4.912098407745361, 'learning_rate': 6.414098992509138e-05, 'epoch': 4.68}\n",
+ "{'loss': 0.6467, 'grad_norm': 7.303658485412598, 'learning_rate': 6.384177557124247e-05, 'epoch': 4.69}\n",
+ "{'loss': 0.6986, 'grad_norm': 4.651421546936035, 'learning_rate': 6.354202340715026e-05, 'epoch': 4.71}\n",
+ "{'loss': 0.6532, 'grad_norm': 4.812668800354004, 'learning_rate': 6.324174507942637e-05, 'epoch': 4.73}\n",
+ "{'loss': 0.6688, 'grad_norm': 4.208662509918213, 'learning_rate': 6.294095225512603e-05, 'epoch': 4.75}\n",
+ "{'loss': 0.674, 'grad_norm': 5.573670387268066, 'learning_rate': 6.263965662129487e-05, 'epoch': 4.77}\n",
+ "{'loss': 0.7383, 'grad_norm': 4.292681694030762, 'learning_rate': 6.233786988451468e-05, 'epoch': 4.78}\n",
+ "{'loss': 0.7485, 'grad_norm': 4.01066255569458, 'learning_rate': 6.203560377044866e-05, 'epoch': 4.8}\n",
+ "{'loss': 0.6504, 'grad_norm': 4.865781307220459, 'learning_rate': 6.173287002338577e-05, 'epoch': 4.82}\n",
+ "{'loss': 0.6004, 'grad_norm': 3.7839431762695312, 'learning_rate': 6.142968040578449e-05, 'epoch': 4.84}\n",
+ "{'loss': 0.6536, 'grad_norm': 7.742762565612793, 'learning_rate': 6.112604669781572e-05, 'epoch': 4.85}\n",
+ "{'loss': 0.7322, 'grad_norm': 5.1467719078063965, 'learning_rate': 6.0821980696905146e-05, 'epoch': 4.87}\n",
+ "{'loss': 0.7393, 'grad_norm': 6.317329406738281, 'learning_rate': 6.0517494217274794e-05, 'epoch': 4.89}\n",
+ "{'loss': 0.7548, 'grad_norm': 5.456260681152344, 'learning_rate': 6.021259908948402e-05, 'epoch': 4.91}\n",
+ "{'loss': 0.6722, 'grad_norm': 5.673567771911621, 'learning_rate': 5.9907307159969884e-05, 'epoch': 4.93}\n",
+ "{'loss': 0.7384, 'grad_norm': 4.8718366622924805, 'learning_rate': 5.960163029058682e-05, 'epoch': 4.94}\n",
+ "{'loss': 0.7634, 'grad_norm': 5.771657943725586, 'learning_rate': 5.9295580358145744e-05, 'epoch': 4.96}\n",
+ "{'loss': 0.7871, 'grad_norm': 4.919590473175049, 'learning_rate': 5.898916925395264e-05, 'epoch': 4.98}\n",
+ "{'loss': 0.7701, 'grad_norm': 4.445159912109375, 'learning_rate': 5.868240888334653e-05, 'epoch': 5.0}\n",
+ " 50%|█████████████████▌ | 2800/5600 [1:18:09<1:20:41, 1.73s/it][INFO|trainer.py:3788] 2024-06-30 07:34:44,634 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-06-30 07:34:44,634 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-06-30 07:34:44,634 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 7%|██▊ | 3/46 [00:00<00:01, 24.62it/s]\u001b[A\n",
+ " 13%|█████▋ | 6/46 [00:00<00:02, 19.70it/s]\u001b[A\n",
+ " 20%|████████▌ | 9/46 [00:00<00:01, 19.03it/s]\u001b[A\n",
+ " 24%|██████████▎ | 11/46 [00:00<00:01, 18.15it/s]\u001b[A\n",
+ " 28%|████████████▏ | 13/46 [00:00<00:01, 17.79it/s]\u001b[A\n",
+ " 33%|██████████████ | 15/46 [00:00<00:01, 17.92it/s]\u001b[A\n",
+ " 37%|███████████████▉ | 17/46 [00:00<00:01, 17.64it/s]\u001b[A\n",
+ " 41%|█████████████████▊ | 19/46 [00:01<00:01, 18.03it/s]\u001b[A\n",
+ " 46%|███████████████████▋ | 21/46 [00:01<00:01, 17.86it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 23/46 [00:01<00:01, 18.04it/s]\u001b[A\n",
+ " 54%|███████████████████████▎ | 25/46 [00:01<00:01, 17.74it/s]\u001b[A\n",
+ " 59%|█████████████████████████▏ | 27/46 [00:01<00:01, 17.80it/s]\u001b[A\n",
+ " 63%|███████████████████████████ | 29/46 [00:01<00:00, 17.87it/s]\u001b[A\n",
+ " 67%|████████████████████████████▉ | 31/46 [00:01<00:00, 17.65it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▊ | 33/46 [00:01<00:00, 17.19it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 35/46 [00:01<00:00, 17.14it/s]\u001b[A\n",
+ " 80%|██████████████████████████████████▌ | 37/46 [00:02<00:00, 17.11it/s]\u001b[A\n",
+ " 85%|████████████████████████████████████▍ | 39/46 [00:02<00:00, 17.21it/s]\u001b[A\n",
+ " 89%|██████████████████████████████████████▎ | 41/46 [00:02<00:00, 17.35it/s]\u001b[A\n",
+ " 93%|████████████████████████████████████████▏ | 43/46 [00:02<00:00, 17.38it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 2.203040599822998, 'eval_runtime': 2.649, 'eval_samples_per_second': 17.365, 'eval_steps_per_second': 17.365, 'epoch': 5.0}\n",
+ " 50%|█████████████████▌ | 2800/5600 [1:18:12<1:20:41, 1.73s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:02<00:00, 17.26it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-06-30 07:34:47,283 >> Saving model checkpoint to saves/qwen2-1.5b/lora/sft/checkpoint-2800\n",
+ "[INFO|configuration_utils.py:733] 2024-06-30 07:34:47,846 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-06-30 07:34:47,846 >> Model config Qwen2Config {\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-06-30 07:34:47,887 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/checkpoint-2800/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-06-30 07:34:47,887 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/checkpoint-2800/special_tokens_map.json\n",
+ "{'loss': 0.5161, 'grad_norm': 3.8078582286834717, 'learning_rate': 5.837531116523682e-05, 'epoch': 5.02}\n",
+ "{'loss': 0.4045, 'grad_norm': 4.426279544830322, 'learning_rate': 5.806788803164034e-05, 'epoch': 5.03}\n",
+ "{'loss': 0.4832, 'grad_norm': 6.388131618499756, 'learning_rate': 5.7760151427217576e-05, 'epoch': 5.05}\n",
+ "{'loss': 0.4554, 'grad_norm': 4.689113616943359, 'learning_rate': 5.745211330880872e-05, 'epoch': 5.07}\n",
+ "{'loss': 0.4532, 'grad_norm': 4.104332447052002, 'learning_rate': 5.714378564496901e-05, 'epoch': 5.09}\n",
+ "{'loss': 0.5509, 'grad_norm': 4.345515727996826, 'learning_rate': 5.683518041550368e-05, 'epoch': 5.1}\n",
+ "{'loss': 0.4002, 'grad_norm': 6.301547527313232, 'learning_rate': 5.6526309611002594e-05, 'epoch': 5.12}\n",
+ "{'loss': 0.4822, 'grad_norm': 5.300792217254639, 'learning_rate': 5.621718523237427e-05, 'epoch': 5.14}\n",
+ "{'loss': 0.4324, 'grad_norm': 4.0373311042785645, 'learning_rate': 5.590781929037965e-05, 'epoch': 5.16}\n",
+ "{'loss': 0.4274, 'grad_norm': 6.742273330688477, 'learning_rate': 5.559822380516539e-05, 'epoch': 5.18}\n",
+ "{'loss': 0.494, 'grad_norm': 6.803271293640137, 'learning_rate': 5.5288410805796895e-05, 'epoch': 5.19}\n",
+ "{'loss': 0.4682, 'grad_norm': 3.1775426864624023, 'learning_rate': 5.497839232979084e-05, 'epoch': 5.21}\n",
+ "{'loss': 0.4614, 'grad_norm': 3.7366745471954346, 'learning_rate': 5.466818042264753e-05, 'epoch': 5.23}\n",
+ "{'loss': 0.5448, 'grad_norm': 5.096468448638916, 'learning_rate': 5.435778713738292e-05, 'epoch': 5.25}\n",
+ "{'loss': 0.4847, 'grad_norm': 4.3523712158203125, 'learning_rate': 5.404722453406017e-05, 'epoch': 5.27}\n",
+ "{'loss': 0.4473, 'grad_norm': 4.652655601501465, 'learning_rate': 5.373650467932122e-05, 'epoch': 5.28}\n",
+ "{'loss': 0.4453, 'grad_norm': 4.760082244873047, 'learning_rate': 5.3425639645917834e-05, 'epoch': 5.3}\n",
+ "{'loss': 0.4814, 'grad_norm': 5.638540267944336, 'learning_rate': 5.311464151224261e-05, 'epoch': 5.32}\n",
+ "{'loss': 0.4013, 'grad_norm': 3.9371888637542725, 'learning_rate': 5.2803522361859594e-05, 'epoch': 5.34}\n",
+ "{'loss': 0.4984, 'grad_norm': 4.2124152183532715, 'learning_rate': 5.249229428303486e-05, 'epoch': 5.35}\n",
+ "{'loss': 0.4914, 'grad_norm': 6.735795974731445, 'learning_rate': 5.218096936826681e-05, 'epoch': 5.37}\n",
+ "{'loss': 0.464, 'grad_norm': 4.825798988342285, 'learning_rate': 5.18695597138163e-05, 'epoch': 5.39}\n",
+ "{'loss': 0.3474, 'grad_norm': 4.686152458190918, 'learning_rate': 5.155807741923666e-05, 'epoch': 5.41}\n",
+ "{'loss': 0.3999, 'grad_norm': 4.344501972198486, 'learning_rate': 5.124653458690365e-05, 'epoch': 5.43}\n",
+ "{'loss': 0.3818, 'grad_norm': 3.8981587886810303, 'learning_rate': 5.0934943321545115e-05, 'epoch': 5.44}\n",
+ "{'loss': 0.4732, 'grad_norm': 8.811891555786133, 'learning_rate': 5.062331572977076e-05, 'epoch': 5.46}\n",
+ "{'loss': 0.609, 'grad_norm': 4.967749118804932, 'learning_rate': 5.031166391960168e-05, 'epoch': 5.48}\n",
+ "{'loss': 0.4442, 'grad_norm': 4.958866596221924, 'learning_rate': 5e-05, 'epoch': 5.5}\n",
+ "{'loss': 0.4474, 'grad_norm': 4.941844940185547, 'learning_rate': 4.968833608039832e-05, 'epoch': 5.52}\n",
+ "{'loss': 0.5222, 'grad_norm': 4.754947662353516, 'learning_rate': 4.9376684270229254e-05, 'epoch': 5.53}\n",
+ "{'loss': 0.4465, 'grad_norm': 4.058730125427246, 'learning_rate': 4.9065056678454904e-05, 'epoch': 5.55}\n",
+ "{'loss': 0.5767, 'grad_norm': 5.571474552154541, 'learning_rate': 4.875346541309637e-05, 'epoch': 5.57}\n",
+ "{'loss': 0.4737, 'grad_norm': 4.0056939125061035, 'learning_rate': 4.844192258076336e-05, 'epoch': 5.59}\n",
+ "{'loss': 0.5223, 'grad_norm': 5.950839042663574, 'learning_rate': 4.813044028618373e-05, 'epoch': 5.6}\n",
+ "{'loss': 0.5301, 'grad_norm': 4.6719255447387695, 'learning_rate': 4.781903063173321e-05, 'epoch': 5.62}\n",
+ "{'loss': 0.4188, 'grad_norm': 4.333907127380371, 'learning_rate': 4.750770571696514e-05, 'epoch': 5.64}\n",
+ "{'loss': 0.4934, 'grad_norm': 6.121321678161621, 'learning_rate': 4.7196477638140404e-05, 'epoch': 5.66}\n",
+ "{'loss': 0.4343, 'grad_norm': 5.436617374420166, 'learning_rate': 4.68853584877574e-05, 'epoch': 5.68}\n",
+ "{'loss': 0.4969, 'grad_norm': 5.086023330688477, 'learning_rate': 4.657436035408217e-05, 'epoch': 5.69}\n",
+ "{'loss': 0.4571, 'grad_norm': 5.212259769439697, 'learning_rate': 4.626349532067879e-05, 'epoch': 5.71}\n",
+ "{'loss': 0.5086, 'grad_norm': 4.355545997619629, 'learning_rate': 4.595277546593984e-05, 'epoch': 5.73}\n",
+ "{'loss': 0.4502, 'grad_norm': 3.553330421447754, 'learning_rate': 4.564221286261709e-05, 'epoch': 5.75}\n",
+ "{'loss': 0.5377, 'grad_norm': 4.984807014465332, 'learning_rate': 4.5331819577352474e-05, 'epoch': 5.77}\n",
+ "{'loss': 0.4203, 'grad_norm': 10.004477500915527, 'learning_rate': 4.502160767020918e-05, 'epoch': 5.78}\n",
+ "{'loss': 0.4515, 'grad_norm': 4.771313190460205, 'learning_rate': 4.471158919420312e-05, 'epoch': 5.8}\n",
+ "{'loss': 0.4102, 'grad_norm': 3.963116407394409, 'learning_rate': 4.4401776194834613e-05, 'epoch': 5.82}\n",
+ "{'loss': 0.4378, 'grad_norm': 5.920322895050049, 'learning_rate': 4.409218070962036e-05, 'epoch': 5.84}\n",
+ "{'loss': 0.5433, 'grad_norm': 5.597177505493164, 'learning_rate': 4.378281476762576e-05, 'epoch': 5.85}\n",
+ "{'loss': 0.3711, 'grad_norm': 10.070011138916016, 'learning_rate': 4.347369038899744e-05, 'epoch': 5.87}\n",
+ "{'loss': 0.5813, 'grad_norm': 3.5711491107940674, 'learning_rate': 4.316481958449634e-05, 'epoch': 5.89}\n",
+ "{'loss': 0.4328, 'grad_norm': 4.168658256530762, 'learning_rate': 4.285621435503101e-05, 'epoch': 5.91}\n",
+ "{'loss': 0.5556, 'grad_norm': 10.734298706054688, 'learning_rate': 4.254788669119127e-05, 'epoch': 5.93}\n",
+ "{'loss': 0.4497, 'grad_norm': 4.482186794281006, 'learning_rate': 4.223984857278242e-05, 'epoch': 5.94}\n",
+ "{'loss': 0.5173, 'grad_norm': 5.7400054931640625, 'learning_rate': 4.1932111968359664e-05, 'epoch': 5.96}\n",
+ "{'loss': 0.5062, 'grad_norm': 4.264299392700195, 'learning_rate': 4.162468883476319e-05, 'epoch': 5.98}\n",
+ "{'loss': 0.4793, 'grad_norm': 9.265963554382324, 'learning_rate': 4.131759111665349e-05, 'epoch': 6.0}\n",
+ " 60%|█████████████████████ | 3360/5600 [1:33:57<1:02:06, 1.66s/it][INFO|trainer.py:3788] 2024-06-30 07:50:32,520 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-06-30 07:50:32,520 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-06-30 07:50:32,520 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 7%|██▊ | 3/46 [00:00<00:01, 29.07it/s]\u001b[A\n",
+ " 13%|█████▋ | 6/46 [00:00<00:01, 23.12it/s]\u001b[A\n",
+ " 20%|████████▌ | 9/46 [00:00<00:01, 20.27it/s]\u001b[A\n",
+ " 26%|███████████▏ | 12/46 [00:00<00:01, 20.10it/s]\u001b[A\n",
+ " 33%|██████████████ | 15/46 [00:00<00:01, 20.29it/s]\u001b[A\n",
+ " 39%|████████████████▊ | 18/46 [00:00<00:01, 19.71it/s]\u001b[A\n",
+ " 43%|██████████████████▋ | 20/46 [00:00<00:01, 19.50it/s]\u001b[A\n",
+ " 48%|████████████████████▌ | 22/46 [00:01<00:01, 18.02it/s]\u001b[A\n",
+ " 52%|██████████████████████▍ | 24/46 [00:01<00:01, 17.97it/s]\u001b[A\n",
+ " 57%|████████████████████████▎ | 26/46 [00:01<00:01, 18.23it/s]\u001b[A\n",
+ " 61%|██████████████████████████▏ | 28/46 [00:01<00:00, 18.18it/s]\u001b[A\n",
+ " 65%|████████████████████████████ | 30/46 [00:01<00:00, 18.06it/s]\u001b[A\n",
+ " 70%|█████████████████████████████▉ | 32/46 [00:01<00:00, 18.33it/s]\u001b[A\n",
+ " 74%|███████████████████████████████▊ | 34/46 [00:01<00:00, 18.31it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▋ | 36/46 [00:01<00:00, 18.47it/s]\u001b[A\n",
+ " 83%|███████████████████████████████████▌ | 38/46 [00:01<00:00, 18.61it/s]\u001b[A\n",
+ " 87%|█████████████████████████████████████▍ | 40/46 [00:02<00:00, 18.26it/s]\u001b[A\n",
+ " 91%|███████████████████████████████████████▎ | 42/46 [00:02<00:00, 18.35it/s]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▏ | 44/46 [00:02<00:00, 18.73it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 2.468099594116211, 'eval_runtime': 2.4745, 'eval_samples_per_second': 18.59, 'eval_steps_per_second': 18.59, 'epoch': 6.0}\n",
+ " 60%|█████████████████████ | 3360/5600 [1:33:59<1:02:06, 1.66s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:02<00:00, 18.73it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-06-30 07:50:34,996 >> Saving model checkpoint to saves/qwen2-1.5b/lora/sft/checkpoint-3360\n",
+ "[INFO|configuration_utils.py:733] 2024-06-30 07:50:35,897 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-06-30 07:50:35,897 >> Model config Qwen2Config {\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-06-30 07:50:35,949 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/checkpoint-3360/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-06-30 07:50:35,949 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/checkpoint-3360/special_tokens_map.json\n",
+ "{'loss': 0.3304, 'grad_norm': 3.7002038955688477, 'learning_rate': 4.101083074604737e-05, 'epoch': 6.02}\n",
+ "{'loss': 0.2771, 'grad_norm': 4.872511863708496, 'learning_rate': 4.0704419641854274e-05, 'epoch': 6.03}\n",
+ "{'loss': 0.297, 'grad_norm': 3.524137020111084, 'learning_rate': 4.03983697094132e-05, 'epoch': 6.05}\n",
+ "{'loss': 0.3305, 'grad_norm': 1.7784379720687866, 'learning_rate': 4.0092692840030134e-05, 'epoch': 6.07}\n",
+ "{'loss': 0.3112, 'grad_norm': 4.346820831298828, 'learning_rate': 3.978740091051599e-05, 'epoch': 6.09}\n",
+ "{'loss': 0.349, 'grad_norm': 3.382376194000244, 'learning_rate': 3.9482505782725224e-05, 'epoch': 6.1}\n",
+ "{'loss': 0.256, 'grad_norm': 5.693515300750732, 'learning_rate': 3.917801930309486e-05, 'epoch': 6.12}\n",
+ "{'loss': 0.3141, 'grad_norm': 4.295924186706543, 'learning_rate': 3.887395330218429e-05, 'epoch': 6.14}\n",
+ "{'loss': 0.3338, 'grad_norm': 6.968880653381348, 'learning_rate': 3.857031959421553e-05, 'epoch': 6.16}\n",
+ "{'loss': 0.3056, 'grad_norm': 5.941532135009766, 'learning_rate': 3.8267129976614254e-05, 'epoch': 6.18}\n",
+ "{'loss': 0.3486, 'grad_norm': 3.909396171569824, 'learning_rate': 3.7964396229551364e-05, 'epoch': 6.19}\n",
+ "{'loss': 0.3718, 'grad_norm': 3.2766306400299072, 'learning_rate': 3.7662130115485314e-05, 'epoch': 6.21}\n",
+ "{'loss': 0.3308, 'grad_norm': 2.8996589183807373, 'learning_rate': 3.7360343378705124e-05, 'epoch': 6.23}\n",
+ "{'loss': 0.3928, 'grad_norm': 3.5176424980163574, 'learning_rate': 3.705904774487396e-05, 'epoch': 6.25}\n",
+ "{'loss': 0.2392, 'grad_norm': 10.349823951721191, 'learning_rate': 3.675825492057364e-05, 'epoch': 6.27}\n",
+ "{'loss': 0.3343, 'grad_norm': 7.081973552703857, 'learning_rate': 3.6457976592849754e-05, 'epoch': 6.28}\n",
+ "{'loss': 0.3097, 'grad_norm': 4.772485733032227, 'learning_rate': 3.6158224428757535e-05, 'epoch': 6.3}\n",
+ "{'loss': 0.3416, 'grad_norm': 3.539324998855591, 'learning_rate': 3.585901007490863e-05, 'epoch': 6.32}\n",
+ "{'loss': 0.34, 'grad_norm': 3.7091081142425537, 'learning_rate': 3.556034515701852e-05, 'epoch': 6.34}\n",
+ "{'loss': 0.3241, 'grad_norm': 5.218664646148682, 'learning_rate': 3.5262241279454785e-05, 'epoch': 6.35}\n",
+ "{'loss': 0.297, 'grad_norm': 3.1589152812957764, 'learning_rate': 3.4964710024786354e-05, 'epoch': 6.37}\n",
+ "{'loss': 0.3073, 'grad_norm': 2.8222711086273193, 'learning_rate': 3.4667762953333295e-05, 'epoch': 6.39}\n",
+ "{'loss': 0.2632, 'grad_norm': 5.614787578582764, 'learning_rate': 3.4371411602717784e-05, 'epoch': 6.41}\n",
+ "{'loss': 0.3375, 'grad_norm': 3.49419903755188, 'learning_rate': 3.4075667487415785e-05, 'epoch': 6.43}\n",
+ "{'loss': 0.323, 'grad_norm': 3.6888363361358643, 'learning_rate': 3.3780542098309654e-05, 'epoch': 6.44}\n",
+ "{'loss': 0.384, 'grad_norm': 2.8714163303375244, 'learning_rate': 3.3486046902241664e-05, 'epoch': 6.46}\n",
+ "{'loss': 0.36, 'grad_norm': 3.664397716522217, 'learning_rate': 3.319219334156847e-05, 'epoch': 6.48}\n",
+ "{'loss': 0.3239, 'grad_norm': 6.702901840209961, 'learning_rate': 3.289899283371657e-05, 'epoch': 6.5}\n",
+ "{'loss': 0.4023, 'grad_norm': 4.371044158935547, 'learning_rate': 3.2606456770738636e-05, 'epoch': 6.51}\n",
+ "{'loss': 0.2967, 'grad_norm': 7.265868663787842, 'learning_rate': 3.231459651887093e-05, 'epoch': 6.53}\n",
+ "{'loss': 0.3192, 'grad_norm': 4.020201683044434, 'learning_rate': 3.2023423418091626e-05, 'epoch': 6.55}\n",
+ "{'loss': 0.2575, 'grad_norm': 4.1831374168396, 'learning_rate': 3.173294878168025e-05, 'epoch': 6.57}\n",
+ "{'loss': 0.4101, 'grad_norm': 3.9656155109405518, 'learning_rate': 3.1443183895778105e-05, 'epoch': 6.59}\n",
+ "{'loss': 0.3168, 'grad_norm': 4.220931053161621, 'learning_rate': 3.115414001894974e-05, 'epoch': 6.6}\n",
+ "{'loss': 0.3144, 'grad_norm': 5.018192768096924, 'learning_rate': 3.086582838174551e-05, 'epoch': 6.62}\n",
+ "{'loss': 0.3115, 'grad_norm': 5.038303852081299, 'learning_rate': 3.0578260186265265e-05, 'epoch': 6.64}\n",
+ "{'loss': 0.2704, 'grad_norm': 2.8466811180114746, 'learning_rate': 3.029144660572304e-05, 'epoch': 6.66}\n",
+ "{'loss': 0.3401, 'grad_norm': 6.789051055908203, 'learning_rate': 3.000539878401296e-05, 'epoch': 6.68}\n",
+ "{'loss': 0.3225, 'grad_norm': 4.522548198699951, 'learning_rate': 2.9720127835276256e-05, 'epoch': 6.69}\n",
+ "{'loss': 0.3306, 'grad_norm': 3.3021113872528076, 'learning_rate': 2.9435644843469436e-05, 'epoch': 6.71}\n",
+ "{'loss': 0.3113, 'grad_norm': 6.549985885620117, 'learning_rate': 2.9151960861933614e-05, 'epoch': 6.73}\n",
+ "{'loss': 0.3258, 'grad_norm': 5.234971523284912, 'learning_rate': 2.886908691296504e-05, 'epoch': 6.75}\n",
+ "{'loss': 0.3151, 'grad_norm': 11.139360427856445, 'learning_rate': 2.858703398738686e-05, 'epoch': 6.76}\n",
+ "{'loss': 0.2697, 'grad_norm': 4.061713695526123, 'learning_rate': 2.8305813044122097e-05, 'epoch': 6.78}\n",
+ "{'loss': 0.2933, 'grad_norm': 5.231247425079346, 'learning_rate': 2.8025435009767747e-05, 'epoch': 6.8}\n",
+ "{'loss': 0.3019, 'grad_norm': 4.183421611785889, 'learning_rate': 2.774591077817038e-05, 'epoch': 6.82}\n",
+ "{'loss': 0.3599, 'grad_norm': 3.7946386337280273, 'learning_rate': 2.746725121000273e-05, 'epoch': 6.84}\n",
+ "{'loss': 0.2529, 'grad_norm': 4.079517364501953, 'learning_rate': 2.718946713234185e-05, 'epoch': 6.85}\n",
+ "{'loss': 0.3049, 'grad_norm': 4.9091033935546875, 'learning_rate': 2.6912569338248315e-05, 'epoch': 6.87}\n",
+ "{'loss': 0.3128, 'grad_norm': 4.097899436950684, 'learning_rate': 2.66365685863469e-05, 'epoch': 6.89}\n",
+ "{'loss': 0.3901, 'grad_norm': 5.156275749206543, 'learning_rate': 2.636147560040866e-05, 'epoch': 6.91}\n",
+ "{'loss': 0.2444, 'grad_norm': 4.529239177703857, 'learning_rate': 2.6087301068934106e-05, 'epoch': 6.93}\n",
+ "{'loss': 0.3085, 'grad_norm': 2.783036470413208, 'learning_rate': 2.581405564473801e-05, 'epoch': 6.94}\n",
+ "{'loss': 0.3202, 'grad_norm': 5.854332447052002, 'learning_rate': 2.5541749944535554e-05, 'epoch': 6.96}\n",
+ "{'loss': 0.3932, 'grad_norm': 5.028692722320557, 'learning_rate': 2.527039454852963e-05, 'epoch': 6.98}\n",
+ "{'loss': 0.2907, 'grad_norm': 7.342068672180176, 'learning_rate': 2.500000000000001e-05, 'epoch': 7.0}\n",
+ " 70%|█████████████████████████▉ | 3920/5600 [1:49:46<47:02, 1.68s/it][INFO|trainer.py:3788] 2024-06-30 08:06:21,782 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-06-30 08:06:21,782 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-06-30 08:06:21,782 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 7%|██▊ | 3/46 [00:00<00:01, 28.22it/s]\u001b[A\n",
+ " 13%|█████▋ | 6/46 [00:00<00:01, 20.48it/s]\u001b[A\n",
+ " 20%|████████▌ | 9/46 [00:00<00:01, 19.88it/s]\u001b[A\n",
+ " 26%|███████████▏ | 12/46 [00:00<00:01, 19.66it/s]\u001b[A\n",
+ " 33%|██████████████ | 15/46 [00:00<00:01, 19.39it/s]\u001b[A\n",
+ " 37%|███████████████▉ | 17/46 [00:00<00:01, 19.14it/s]\u001b[A\n",
+ " 41%|█████████████████▊ | 19/46 [00:00<00:01, 18.76it/s]\u001b[A\n",
+ " 46%|███████████████████▋ | 21/46 [00:01<00:01, 18.05it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 23/46 [00:01<00:01, 18.18it/s]\u001b[A\n",
+ " 54%|███████████████████████▎ | 25/46 [00:01<00:01, 17.78it/s]\u001b[A\n",
+ " 59%|█████████████████████████▏ | 27/46 [00:01<00:01, 17.79it/s]\u001b[A\n",
+ " 63%|███████████████████████████ | 29/46 [00:01<00:00, 18.33it/s]\u001b[A\n",
+ " 67%|████████████████████████████▉ | 31/46 [00:01<00:00, 17.54it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▊ | 33/46 [00:01<00:00, 17.87it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 35/46 [00:01<00:00, 17.98it/s]\u001b[A\n",
+ " 80%|██████████████████████████████████▌ | 37/46 [00:01<00:00, 18.37it/s]\u001b[A\n",
+ " 85%|████████████████████████████████████▍ | 39/46 [00:02<00:00, 17.48it/s]\u001b[A\n",
+ " 89%|██████████████████████████████████████▎ | 41/46 [00:02<00:00, 17.09it/s]\u001b[A\n",
+ " 93%|████████████████████████████████████████▏ | 43/46 [00:02<00:00, 17.19it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 2.79950213432312, 'eval_runtime': 2.5687, 'eval_samples_per_second': 17.908, 'eval_steps_per_second': 17.908, 'epoch': 7.0}\n",
+ " 70%|█████████████████████████▉ | 3920/5600 [1:49:49<47:02, 1.68s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:02<00:00, 17.22it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-06-30 08:06:24,351 >> Saving model checkpoint to saves/qwen2-1.5b/lora/sft/checkpoint-3920\n",
+ "[INFO|configuration_utils.py:733] 2024-06-30 08:06:24,921 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-06-30 08:06:24,921 >> Model config Qwen2Config {\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-06-30 08:06:24,969 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/checkpoint-3920/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-06-30 08:06:24,969 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/checkpoint-3920/special_tokens_map.json\n",
+ "{'loss': 0.173, 'grad_norm': 2.5581254959106445, 'learning_rate': 2.473057680489348e-05, 'epoch': 7.01}\n",
+ "{'loss': 0.155, 'grad_norm': 1.571964144706726, 'learning_rate': 2.4462135431415733e-05, 'epoch': 7.03}\n",
+ "{'loss': 0.1707, 'grad_norm': 4.100607872009277, 'learning_rate': 2.4194686309624663e-05, 'epoch': 7.05}\n",
+ "{'loss': 0.2566, 'grad_norm': 3.124080181121826, 'learning_rate': 2.39282398310251e-05, 'epoch': 7.07}\n",
+ "{'loss': 0.1927, 'grad_norm': 2.3269150257110596, 'learning_rate': 2.366280634816496e-05, 'epoch': 7.09}\n",
+ "{'loss': 0.2814, 'grad_norm': 2.976987838745117, 'learning_rate': 2.3398396174233178e-05, 'epoch': 7.1}\n",
+ "{'loss': 0.1816, 'grad_norm': 7.45166015625, 'learning_rate': 2.3135019582658802e-05, 'epoch': 7.12}\n",
+ "{'loss': 0.2094, 'grad_norm': 3.6462252140045166, 'learning_rate': 2.2872686806712035e-05, 'epoch': 7.14}\n",
+ "{'loss': 0.1964, 'grad_norm': 4.667559623718262, 'learning_rate': 2.261140803910644e-05, 'epoch': 7.16}\n",
+ "{'loss': 0.2011, 'grad_norm': 1.900875449180603, 'learning_rate': 2.235119343160303e-05, 'epoch': 7.18}\n",
+ "{'loss': 0.1984, 'grad_norm': 3.4627413749694824, 'learning_rate': 2.2092053094615813e-05, 'epoch': 7.19}\n",
+ "{'loss': 0.2214, 'grad_norm': 3.783445358276367, 'learning_rate': 2.1833997096818898e-05, 'epoch': 7.21}\n",
+ "{'loss': 0.2047, 'grad_norm': 3.812368154525757, 'learning_rate': 2.157703546475539e-05, 'epoch': 7.23}\n",
+ "{'loss': 0.2822, 'grad_norm': 4.192997455596924, 'learning_rate': 2.132117818244771e-05, 'epoch': 7.25}\n",
+ "{'loss': 0.1728, 'grad_norm': 2.5023624897003174, 'learning_rate': 2.1066435191009715e-05, 'epoch': 7.26}\n",
+ "{'loss': 0.257, 'grad_norm': 7.863531589508057, 'learning_rate': 2.0812816388260518e-05, 'epoch': 7.28}\n",
+ "{'loss': 0.1866, 'grad_norm': 3.7875170707702637, 'learning_rate': 2.056033162833977e-05, 'epoch': 7.3}\n",
+ "{'loss': 0.1975, 'grad_norm': 1.9177666902542114, 'learning_rate': 2.0308990721324927e-05, 'epoch': 7.32}\n",
+ "{'loss': 0.2784, 'grad_norm': 3.343583345413208, 'learning_rate': 2.0058803432849987e-05, 'epoch': 7.34}\n",
+ "{'loss': 0.1964, 'grad_norm': 4.66720724105835, 'learning_rate': 1.980977948372612e-05, 'epoch': 7.35}\n",
+ "{'loss': 0.2273, 'grad_norm': 4.310459136962891, 'learning_rate': 1.9561928549563968e-05, 'epoch': 7.37}\n",
+ "{'loss': 0.2671, 'grad_norm': 3.40097975730896, 'learning_rate': 1.931526026039764e-05, 'epoch': 7.39}\n",
+ "{'loss': 0.1796, 'grad_norm': 4.316131591796875, 'learning_rate': 1.906978420031059e-05, 'epoch': 7.41}\n",
+ "{'loss': 0.2422, 'grad_norm': 3.5017640590667725, 'learning_rate': 1.8825509907063327e-05, 'epoch': 7.43}\n",
+ "{'loss': 0.2054, 'grad_norm': 3.1226840019226074, 'learning_rate': 1.8582446871722636e-05, 'epoch': 7.44}\n",
+ "{'loss': 0.2701, 'grad_norm': 3.492358684539795, 'learning_rate': 1.8340604538293015e-05, 'epoch': 7.46}\n",
+ "{'loss': 0.2063, 'grad_norm': 3.7369136810302734, 'learning_rate': 1.8099992303349577e-05, 'epoch': 7.48}\n",
+ "{'loss': 0.2125, 'grad_norm': 2.5832254886627197, 'learning_rate': 1.7860619515673033e-05, 'epoch': 7.5}\n",
+ "{'loss': 0.2224, 'grad_norm': 4.129978179931641, 'learning_rate': 1.7622495475886487e-05, 'epoch': 7.51}\n",
+ "{'loss': 0.2496, 'grad_norm': 3.395150661468506, 'learning_rate': 1.738562943609396e-05, 'epoch': 7.53}\n",
+ "{'loss': 0.2337, 'grad_norm': 3.5454814434051514, 'learning_rate': 1.7150030599520984e-05, 'epoch': 7.55}\n",
+ "{'loss': 0.166, 'grad_norm': 4.08375883102417, 'learning_rate': 1.691570812015704e-05, 'epoch': 7.57}\n",
+ "{'loss': 0.1812, 'grad_norm': 2.692218542098999, 'learning_rate': 1.6682671102399805e-05, 'epoch': 7.59}\n",
+ "{'loss': 0.1598, 'grad_norm': 4.0160231590271, 'learning_rate': 1.6450928600701504e-05, 'epoch': 7.6}\n",
+ "{'loss': 0.1428, 'grad_norm': 3.891842842102051, 'learning_rate': 1.622048961921699e-05, 'epoch': 7.62}\n",
+ "{'loss': 0.2096, 'grad_norm': 4.69844913482666, 'learning_rate': 1.599136311145402e-05, 'epoch': 7.64}\n",
+ "{'loss': 0.3205, 'grad_norm': 3.1264488697052, 'learning_rate': 1.5763557979925324e-05, 'epoch': 7.66}\n",
+ "{'loss': 0.2708, 'grad_norm': 4.53989315032959, 'learning_rate': 1.553708307580265e-05, 'epoch': 7.68}\n",
+ "{'loss': 0.2095, 'grad_norm': 2.3542563915252686, 'learning_rate': 1.531194719857292e-05, 'epoch': 7.69}\n",
+ "{'loss': 0.1645, 'grad_norm': 5.031445503234863, 'learning_rate': 1.5088159095696363e-05, 'epoch': 7.71}\n",
+ "{'loss': 0.2475, 'grad_norm': 5.543506145477295, 'learning_rate': 1.4865727462266543e-05, 'epoch': 7.73}\n",
+ "{'loss': 0.2831, 'grad_norm': 3.0280065536499023, 'learning_rate': 1.4644660940672627e-05, 'epoch': 7.75}\n",
+ "{'loss': 0.2326, 'grad_norm': 3.493994951248169, 'learning_rate': 1.4424968120263504e-05, 'epoch': 7.76}\n",
+ "{'loss': 0.2206, 'grad_norm': 3.2602357864379883, 'learning_rate': 1.4206657537014079e-05, 'epoch': 7.78}\n",
+ "{'loss': 0.2249, 'grad_norm': 5.095388412475586, 'learning_rate': 1.398973767319368e-05, 'epoch': 7.8}\n",
+ "{'loss': 0.2642, 'grad_norm': 13.810650825500488, 'learning_rate': 1.3774216957036367e-05, 'epoch': 7.82}\n",
+ "{'loss': 0.1592, 'grad_norm': 11.616129875183105, 'learning_rate': 1.3560103762413584e-05, 'epoch': 7.84}\n",
+ "{'loss': 0.1754, 'grad_norm': 4.300165176391602, 'learning_rate': 1.3347406408508695e-05, 'epoch': 7.85}\n",
+ "{'loss': 0.2721, 'grad_norm': 3.653315782546997, 'learning_rate': 1.3136133159493802e-05, 'epoch': 7.87}\n",
+ "{'loss': 0.1998, 'grad_norm': 3.611405611038208, 'learning_rate': 1.2926292224208664e-05, 'epoch': 7.89}\n",
+ "{'loss': 0.2173, 'grad_norm': 12.638509750366211, 'learning_rate': 1.2717891755841722e-05, 'epoch': 7.91}\n",
+ "{'loss': 0.2315, 'grad_norm': 12.878602981567383, 'learning_rate': 1.2510939851613285e-05, 'epoch': 7.93}\n",
+ "{'loss': 0.2456, 'grad_norm': 3.8157997131347656, 'learning_rate': 1.230544455246101e-05, 'epoch': 7.94}\n",
+ "{'loss': 0.1785, 'grad_norm': 4.757344722747803, 'learning_rate': 1.2101413842727345e-05, 'epoch': 7.96}\n",
+ "{'loss': 0.2599, 'grad_norm': 9.757575035095215, 'learning_rate': 1.1898855649849461e-05, 'epoch': 7.98}\n",
+ "{'loss': 0.1662, 'grad_norm': 2.5260682106018066, 'learning_rate': 1.1697777844051105e-05, 'epoch': 8.0}\n",
+ " 80%|█████████████████████████████▌ | 4480/5600 [2:05:14<30:43, 1.65s/it][INFO|trainer.py:3788] 2024-06-30 08:21:50,010 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-06-30 08:21:50,010 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-06-30 08:21:50,010 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 7%|██▊ | 3/46 [00:00<00:01, 26.94it/s]\u001b[A\n",
+ " 13%|█████▋ | 6/46 [00:00<00:01, 22.20it/s]\u001b[A\n",
+ " 20%|████████▌ | 9/46 [00:00<00:01, 21.22it/s]\u001b[A\n",
+ " 26%|███████████▏ | 12/46 [00:00<00:01, 20.62it/s]\u001b[A\n",
+ " 33%|██████████████ | 15/46 [00:00<00:01, 19.95it/s]\u001b[A\n",
+ " 39%|████████████████▊ | 18/46 [00:00<00:01, 20.12it/s]\u001b[A\n",
+ " 46%|███████████████████▋ | 21/46 [00:01<00:01, 20.20it/s]\u001b[A\n",
+ " 52%|██████████████████████▍ | 24/46 [00:01<00:01, 19.55it/s]\u001b[A\n",
+ " 57%|████████████████████████▎ | 26/46 [00:01<00:01, 19.49it/s]\u001b[A\n",
+ " 61%|██████████████████████████▏ | 28/46 [00:01<00:00, 19.50it/s]\u001b[A\n",
+ " 67%|████████████████████████████▉ | 31/46 [00:01<00:00, 19.86it/s]\u001b[A\n",
+ " 74%|███████████████████████████████▊ | 34/46 [00:01<00:00, 19.99it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▋ | 36/46 [00:01<00:00, 19.87it/s]\u001b[A\n",
+ " 83%|███████████████████████████████████▌ | 38/46 [00:01<00:00, 19.38it/s]\u001b[A\n",
+ " 87%|█████████████████████████████████████▍ | 40/46 [00:02<00:00, 19.02it/s]\u001b[A\n",
+ " 93%|████████████████████████████████████████▏ | 43/46 [00:02<00:00, 19.49it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 3.0680618286132812, 'eval_runtime': 2.3737, 'eval_samples_per_second': 19.379, 'eval_steps_per_second': 19.379, 'epoch': 8.0}\n",
+ " 80%|█████████████████████████████▌ | 4480/5600 [2:05:17<30:43, 1.65s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:02<00:00, 18.92it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-06-30 08:21:52,385 >> Saving model checkpoint to saves/qwen2-1.5b/lora/sft/checkpoint-4480\n",
+ "[INFO|configuration_utils.py:733] 2024-06-30 08:21:52,924 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-06-30 08:21:52,925 >> Model config Qwen2Config {\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-06-30 08:21:52,971 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/checkpoint-4480/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-06-30 08:21:52,971 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/checkpoint-4480/special_tokens_map.json\n",
+ "{'loss': 0.1445, 'grad_norm': 1.7341903448104858, 'learning_rate': 1.1498188238036861e-05, 'epoch': 8.01}\n",
+ "{'loss': 0.113, 'grad_norm': 2.666015863418579, 'learning_rate': 1.130009458668863e-05, 'epoch': 8.03}\n",
+ "{'loss': 0.2103, 'grad_norm': 3.4498844146728516, 'learning_rate': 1.1103504586764263e-05, 'epoch': 8.05}\n",
+ "{'loss': 0.1438, 'grad_norm': 3.1626198291778564, 'learning_rate': 1.090842587659851e-05, 'epoch': 8.07}\n",
+ "{'loss': 0.1212, 'grad_norm': 2.0630691051483154, 'learning_rate': 1.0714866035806326e-05, 'epoch': 8.09}\n",
+ "{'loss': 0.1799, 'grad_norm': 3.088937282562256, 'learning_rate': 1.0522832584988234e-05, 'epoch': 8.1}\n",
+ "{'loss': 0.1107, 'grad_norm': 2.786867380142212, 'learning_rate': 1.0332332985438248e-05, 'epoch': 8.12}\n",
+ "{'loss': 0.1752, 'grad_norm': 1.7368676662445068, 'learning_rate': 1.0143374638853891e-05, 'epoch': 8.14}\n",
+ "{'loss': 0.1186, 'grad_norm': 2.039095878601074, 'learning_rate': 9.955964887048607e-06, 'epoch': 8.16}\n",
+ "{'loss': 0.133, 'grad_norm': 3.3849267959594727, 'learning_rate': 9.770111011666583e-06, 'epoch': 8.17}\n",
+ "{'loss': 0.1982, 'grad_norm': 3.557002305984497, 'learning_rate': 9.58582023389974e-06, 'epoch': 8.19}\n",
+ "{'loss': 0.1974, 'grad_norm': 2.3108747005462646, 'learning_rate': 9.403099714207175e-06, 'epoch': 8.21}\n",
+ "{'loss': 0.1912, 'grad_norm': 3.278822183609009, 'learning_rate': 9.221956552036992e-06, 'epoch': 8.23}\n",
+ "{'loss': 0.1803, 'grad_norm': 3.1523773670196533, 'learning_rate': 9.042397785550405e-06, 'epoch': 8.25}\n",
+ "{'loss': 0.1569, 'grad_norm': 4.3890862464904785, 'learning_rate': 8.864430391348332e-06, 'epoch': 8.26}\n",
+ "{'loss': 0.1246, 'grad_norm': 2.280132532119751, 'learning_rate': 8.688061284200266e-06, 'epoch': 8.28}\n",
+ "{'loss': 0.1357, 'grad_norm': 5.60243034362793, 'learning_rate': 8.513297316775625e-06, 'epoch': 8.3}\n",
+ "{'loss': 0.148, 'grad_norm': 4.402873992919922, 'learning_rate': 8.34014527937756e-06, 'epoch': 8.32}\n",
+ "{'loss': 0.1943, 'grad_norm': 3.0184195041656494, 'learning_rate': 8.168611899679013e-06, 'epoch': 8.34}\n",
+ "{'loss': 0.1651, 'grad_norm': 3.244899034500122, 'learning_rate': 7.998703842461431e-06, 'epoch': 8.35}\n",
+ "{'loss': 0.1725, 'grad_norm': 2.839618444442749, 'learning_rate': 7.830427709355725e-06, 'epoch': 8.37}\n",
+ "{'loss': 0.1764, 'grad_norm': 2.485934257507324, 'learning_rate': 7.663790038585793e-06, 'epoch': 8.39}\n",
+ "{'loss': 0.1308, 'grad_norm': 1.899274230003357, 'learning_rate': 7.498797304714544e-06, 'epoch': 8.41}\n",
+ "{'loss': 0.1754, 'grad_norm': 2.954799175262451, 'learning_rate': 7.33545591839222e-06, 'epoch': 8.42}\n",
+ "{'loss': 0.1637, 'grad_norm': 2.595350980758667, 'learning_rate': 7.173772226107434e-06, 'epoch': 8.44}\n",
+ "{'loss': 0.1736, 'grad_norm': 3.245035409927368, 'learning_rate': 7.013752509940485e-06, 'epoch': 8.46}\n",
+ "{'loss': 0.1975, 'grad_norm': 2.79209303855896, 'learning_rate': 6.855402987319348e-06, 'epoch': 8.48}\n",
+ "{'loss': 0.1608, 'grad_norm': 3.176992177963257, 'learning_rate': 6.698729810778065e-06, 'epoch': 8.5}\n",
+ "{'loss': 0.1425, 'grad_norm': 3.4199535846710205, 'learning_rate': 6.54373906771768e-06, 'epoch': 8.51}\n",
+ "{'loss': 0.1397, 'grad_norm': 2.7271015644073486, 'learning_rate': 6.390436780169734e-06, 'epoch': 8.53}\n",
+ "{'loss': 0.2058, 'grad_norm': 2.070603847503662, 'learning_rate': 6.238828904562316e-06, 'epoch': 8.55}\n",
+ "{'loss': 0.1612, 'grad_norm': 2.0566606521606445, 'learning_rate': 6.088921331488568e-06, 'epoch': 8.57}\n",
+ "{'loss': 0.0814, 'grad_norm': 2.8068063259124756, 'learning_rate': 5.94071988547788e-06, 'epoch': 8.59}\n",
+ "{'loss': 0.1446, 'grad_norm': 2.784498691558838, 'learning_rate': 5.794230324769517e-06, 'epoch': 8.6}\n",
+ "{'loss': 0.1159, 'grad_norm': 1.9032204151153564, 'learning_rate': 5.649458341088915e-06, 'epoch': 8.62}\n",
+ "{'loss': 0.1592, 'grad_norm': 3.4346718788146973, 'learning_rate': 5.506409559426573e-06, 'epoch': 8.64}\n",
+ "{'loss': 0.22, 'grad_norm': 1.5027986764907837, 'learning_rate': 5.365089537819434e-06, 'epoch': 8.66}\n",
+ "{'loss': 0.1448, 'grad_norm': 2.700094223022461, 'learning_rate': 5.2255037671349535e-06, 'epoch': 8.67}\n",
+ "{'loss': 0.173, 'grad_norm': 1.7306227684020996, 'learning_rate': 5.087657670857798e-06, 'epoch': 8.69}\n",
+ "{'loss': 0.1248, 'grad_norm': 2.1537575721740723, 'learning_rate': 4.951556604879048e-06, 'epoch': 8.71}\n",
+ "{'loss': 0.1908, 'grad_norm': 3.676980972290039, 'learning_rate': 4.8172058572881765e-06, 'epoch': 8.73}\n",
+ "{'loss': 0.1352, 'grad_norm': 3.5679290294647217, 'learning_rate': 4.684610648167503e-06, 'epoch': 8.75}\n",
+ "{'loss': 0.1403, 'grad_norm': 3.0709311962127686, 'learning_rate': 4.5537761293894535e-06, 'epoch': 8.76}\n",
+ "{'loss': 0.157, 'grad_norm': 4.296623706817627, 'learning_rate': 4.424707384416344e-06, 'epoch': 8.78}\n",
+ "{'loss': 0.1893, 'grad_norm': 2.2179601192474365, 'learning_rate': 4.29740942810285e-06, 'epoch': 8.8}\n",
+ "{'loss': 0.1651, 'grad_norm': 3.256356716156006, 'learning_rate': 4.1718872065011904e-06, 'epoch': 8.82}\n",
+ "{'loss': 0.1567, 'grad_norm': 2.9854118824005127, 'learning_rate': 4.048145596668967e-06, 'epoch': 8.84}\n",
+ "{'loss': 0.1463, 'grad_norm': 1.8327380418777466, 'learning_rate': 3.9261894064796135e-06, 'epoch': 8.85}\n",
+ "{'loss': 0.1511, 'grad_norm': 3.7827322483062744, 'learning_rate': 3.8060233744356633e-06, 'epoch': 8.87}\n",
+ "{'loss': 0.1404, 'grad_norm': 1.0582958459854126, 'learning_rate': 3.687652169484568e-06, 'epoch': 8.89}\n",
+ "{'loss': 0.1494, 'grad_norm': 2.8895883560180664, 'learning_rate': 3.5710803908373224e-06, 'epoch': 8.91}\n",
+ "{'loss': 0.1691, 'grad_norm': 4.131629467010498, 'learning_rate': 3.4563125677897932e-06, 'epoch': 8.92}\n",
+ "{'loss': 0.2159, 'grad_norm': 3.1218199729919434, 'learning_rate': 3.343353159546675e-06, 'epoch': 8.94}\n",
+ "{'loss': 0.1488, 'grad_norm': 1.3627033233642578, 'learning_rate': 3.2322065550483007e-06, 'epoch': 8.96}\n",
+ "{'loss': 0.1338, 'grad_norm': 0.7474280595779419, 'learning_rate': 3.1228770728000455e-06, 'epoch': 8.98}\n",
+ "{'loss': 0.1358, 'grad_norm': 7.197608947753906, 'learning_rate': 3.0153689607045845e-06, 'epoch': 9.0}\n",
+ " 90%|█████████████████████████████████▎ | 5040/5600 [2:20:27<14:49, 1.59s/it][INFO|trainer.py:3788] 2024-06-30 08:37:02,463 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-06-30 08:37:02,463 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-06-30 08:37:02,463 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 7%|██▊ | 3/46 [00:00<00:01, 28.92it/s]\u001b[A\n",
+ " 13%|█████▋ | 6/46 [00:00<00:01, 22.11it/s]\u001b[A\n",
+ " 20%|████████▌ | 9/46 [00:00<00:01, 20.35it/s]\u001b[A\n",
+ " 26%|███████████▏ | 12/46 [00:00<00:01, 19.98it/s]\u001b[A\n",
+ " 33%|██████████████ | 15/46 [00:00<00:01, 19.91it/s]\u001b[A\n",
+ " 39%|████████████████▊ | 18/46 [00:00<00:01, 20.28it/s]\u001b[A\n",
+ " 46%|███████████████████▋ | 21/46 [00:01<00:01, 20.37it/s]\u001b[A\n",
+ " 52%|██████████████████████▍ | 24/46 [00:01<00:01, 20.46it/s]\u001b[A\n",
+ " 59%|█████████████████████████▏ | 27/46 [00:01<00:00, 20.56it/s]\u001b[A\n",
+ " 65%|████████████████████████████ | 30/46 [00:01<00:00, 20.24it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▊ | 33/46 [00:01<00:00, 19.81it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 35/46 [00:01<00:00, 19.77it/s]\u001b[A\n",
+ " 83%|███████████████████████████████████▌ | 38/46 [00:01<00:00, 20.14it/s]\u001b[A\n",
+ " 89%|██████████████████████████████████████▎ | 41/46 [00:02<00:00, 20.31it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 3.3791232109069824, 'eval_runtime': 2.3157, 'eval_samples_per_second': 19.864, 'eval_steps_per_second': 19.864, 'epoch': 9.0}\n",
+ " 90%|█████████████████████████████████▎ | 5040/5600 [2:20:29<14:49, 1.59s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:02<00:00, 20.63it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-06-30 08:37:04,779 >> Saving model checkpoint to saves/qwen2-1.5b/lora/sft/checkpoint-5040\n",
+ "[INFO|configuration_utils.py:733] 2024-06-30 08:37:05,334 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-06-30 08:37:05,334 >> Model config Qwen2Config {\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-06-30 08:37:05,376 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/checkpoint-5040/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-06-30 08:37:05,376 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/checkpoint-5040/special_tokens_map.json\n",
+ "{'loss': 0.127, 'grad_norm': 2.002713441848755, 'learning_rate': 2.9096863958968268e-06, 'epoch': 9.01}\n",
+ "{'loss': 0.1209, 'grad_norm': 1.9945132732391357, 'learning_rate': 2.8058334845816213e-06, 'epoch': 9.03}\n",
+ "{'loss': 0.1432, 'grad_norm': 1.8608288764953613, 'learning_rate': 2.7038142618741992e-06, 'epoch': 9.05}\n",
+ "{'loss': 0.1707, 'grad_norm': 3.0099308490753174, 'learning_rate': 2.603632691643415e-06, 'epoch': 9.07}\n",
+ "{'loss': 0.1253, 'grad_norm': 2.0166702270507812, 'learning_rate': 2.5052926663577e-06, 'epoch': 9.09}\n",
+ "{'loss': 0.1316, 'grad_norm': 2.971637487411499, 'learning_rate': 2.408798006933882e-06, 'epoch': 9.1}\n",
+ "{'loss': 0.0958, 'grad_norm': 3.2099649906158447, 'learning_rate': 2.314152462588659e-06, 'epoch': 9.12}\n",
+ "{'loss': 0.1209, 'grad_norm': 3.63370418548584, 'learning_rate': 2.221359710692961e-06, 'epoch': 9.14}\n",
+ "{'loss': 0.0965, 'grad_norm': 1.7885162830352783, 'learning_rate': 2.1304233566290964e-06, 'epoch': 9.16}\n",
+ "{'loss': 0.0858, 'grad_norm': 1.2611875534057617, 'learning_rate': 2.041346933650612e-06, 'epoch': 9.17}\n",
+ "{'loss': 0.1183, 'grad_norm': 1.9412541389465332, 'learning_rate': 1.9541339027450256e-06, 'epoch': 9.19}\n",
+ "{'loss': 0.1395, 'grad_norm': 1.758565068244934, 'learning_rate': 1.8687876524993987e-06, 'epoch': 9.21}\n",
+ "{'loss': 0.0943, 'grad_norm': 2.854973316192627, 'learning_rate': 1.785311498968617e-06, 'epoch': 9.23}\n",
+ "{'loss': 0.1427, 'grad_norm': 6.644575119018555, 'learning_rate': 1.70370868554659e-06, 'epoch': 9.25}\n",
+ "{'loss': 0.1434, 'grad_norm': 3.3001134395599365, 'learning_rate': 1.6239823828401945e-06, 'epoch': 9.26}\n",
+ "{'loss': 0.1218, 'grad_norm': 0.8132327795028687, 'learning_rate': 1.5461356885461075e-06, 'epoch': 9.28}\n",
+ "{'loss': 0.1723, 'grad_norm': 1.5899766683578491, 'learning_rate': 1.4701716273304521e-06, 'epoch': 9.3}\n",
+ "{'loss': 0.1067, 'grad_norm': 1.619358777999878, 'learning_rate': 1.3960931507112752e-06, 'epoch': 9.32}\n",
+ "{'loss': 0.1708, 'grad_norm': 1.985873818397522, 'learning_rate': 1.3239031369438326e-06, 'epoch': 9.34}\n",
+ "{'loss': 0.1221, 'grad_norm': 2.568528175354004, 'learning_rate': 1.2536043909088191e-06, 'epoch': 9.35}\n",
+ "{'loss': 0.1285, 'grad_norm': 2.3798413276672363, 'learning_rate': 1.1851996440033319e-06, 'epoch': 9.37}\n",
+ "{'loss': 0.1118, 'grad_norm': 0.7661011815071106, 'learning_rate': 1.118691554034773e-06, 'epoch': 9.39}\n",
+ "{'loss': 0.1178, 'grad_norm': 2.5488016605377197, 'learning_rate': 1.0540827051175818e-06, 'epoch': 9.41}\n",
+ "{'loss': 0.1357, 'grad_norm': 3.0472471714019775, 'learning_rate': 9.913756075728087e-07, 'epoch': 9.42}\n",
+ "{'loss': 0.0948, 'grad_norm': 0.8541691899299622, 'learning_rate': 9.305726978306173e-07, 'epoch': 9.44}\n",
+ "{'loss': 0.1502, 'grad_norm': 0.9478998780250549, 'learning_rate': 8.716763383355864e-07, 'epoch': 9.46}\n",
+ "{'loss': 0.1266, 'grad_norm': 1.7219117879867554, 'learning_rate': 8.146888174549339e-07, 'epoch': 9.48}\n",
+ "{'loss': 0.0963, 'grad_norm': 2.673491954803467, 'learning_rate': 7.596123493895991e-07, 'epoch': 9.5}\n",
+ "{'loss': 0.186, 'grad_norm': 2.8655078411102295, 'learning_rate': 7.064490740882057e-07, 'epoch': 9.51}\n",
+ "{'loss': 0.1359, 'grad_norm': 2.7357897758483887, 'learning_rate': 6.552010571639456e-07, 'epoch': 9.53}\n",
+ "{'loss': 0.1508, 'grad_norm': 2.8306162357330322, 'learning_rate': 6.058702898142643e-07, 'epoch': 9.55}\n",
+ "{'loss': 0.1036, 'grad_norm': 3.270542621612549, 'learning_rate': 5.584586887435739e-07, 'epoch': 9.57}\n",
+ "{'loss': 0.1504, 'grad_norm': 2.821152925491333, 'learning_rate': 5.129680960887007e-07, 'epoch': 9.59}\n",
+ "{'loss': 0.1388, 'grad_norm': 1.7769047021865845, 'learning_rate': 4.6940027934735954e-07, 'epoch': 9.6}\n",
+ "{'loss': 0.1101, 'grad_norm': 2.468860387802124, 'learning_rate': 4.277569313094809e-07, 'epoch': 9.62}\n",
+ "{'loss': 0.1552, 'grad_norm': 2.018123149871826, 'learning_rate': 3.8803966999139684e-07, 'epoch': 9.64}\n",
+ "{'loss': 0.124, 'grad_norm': 1.8176459074020386, 'learning_rate': 3.50250038573019e-07, 'epoch': 9.66}\n",
+ "{'loss': 0.1084, 'grad_norm': 1.4483444690704346, 'learning_rate': 3.143895053378698e-07, 'epoch': 9.67}\n",
+ "{'loss': 0.1122, 'grad_norm': 2.107964038848877, 'learning_rate': 2.8045946361601183e-07, 'epoch': 9.69}\n",
+ "{'loss': 0.167, 'grad_norm': 3.6413228511810303, 'learning_rate': 2.4846123172992954e-07, 'epoch': 9.71}\n",
+ "{'loss': 0.1457, 'grad_norm': 3.200455904006958, 'learning_rate': 2.1839605294330933e-07, 'epoch': 9.73}\n",
+ "{'loss': 0.1466, 'grad_norm': 2.517279863357544, 'learning_rate': 1.9026509541272275e-07, 'epoch': 9.75}\n",
+ "{'loss': 0.1311, 'grad_norm': 1.4828776121139526, 'learning_rate': 1.640694521422459e-07, 'epoch': 9.76}\n",
+ "{'loss': 0.1393, 'grad_norm': 2.981771945953369, 'learning_rate': 1.3981014094099353e-07, 'epoch': 9.78}\n",
+ "{'loss': 0.0979, 'grad_norm': 1.9358062744140625, 'learning_rate': 1.1748810438355628e-07, 'epoch': 9.8}\n",
+ "{'loss': 0.1863, 'grad_norm': 1.4688208103179932, 'learning_rate': 9.710420977340762e-08, 'epoch': 9.82}\n",
+ "{'loss': 0.1427, 'grad_norm': 2.011298418045044, 'learning_rate': 7.865924910916977e-08, 'epoch': 9.83}\n",
+ "{'loss': 0.0873, 'grad_norm': 3.8186533451080322, 'learning_rate': 6.215393905388278e-08, 'epoch': 9.85}\n",
+ "{'loss': 0.099, 'grad_norm': 2.1099774837493896, 'learning_rate': 4.7588920907110094e-08, 'epoch': 9.87}\n",
+ "{'loss': 0.1162, 'grad_norm': 2.2421796321868896, 'learning_rate': 3.496476058006959e-08, 'epoch': 9.89}\n",
+ "{'loss': 0.1642, 'grad_norm': 3.2422990798950195, 'learning_rate': 2.4281948573617874e-08, 'epoch': 9.91}\n",
+ "{'loss': 0.1631, 'grad_norm': 2.7475242614746094, 'learning_rate': 1.5540899959187727e-08, 'epoch': 9.92}\n",
+ "{'loss': 0.145, 'grad_norm': 2.781863212585449, 'learning_rate': 8.741954362678772e-09, 'epoch': 9.94}\n",
+ "{'loss': 0.1242, 'grad_norm': 3.5185129642486572, 'learning_rate': 3.885375951256931e-09, 'epoch': 9.96}\n",
+ "{'loss': 0.1676, 'grad_norm': 2.617418050765991, 'learning_rate': 9.713534230904041e-10, 'epoch': 9.98}\n",
+ "{'loss': 0.1304, 'grad_norm': 2.882068395614624, 'learning_rate': 0.0, 'epoch': 10.0}\n",
+ "100%|█████████████████████████████████████| 5600/5600 [2:35:33<00:00, 1.59s/it][INFO|trainer.py:3788] 2024-06-30 08:52:08,248 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-06-30 08:52:08,248 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-06-30 08:52:08,248 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 7%|██▊ | 3/46 [00:00<00:01, 27.21it/s]\u001b[A\n",
+ " 13%|█████▋ | 6/46 [00:00<00:01, 22.26it/s]\u001b[A\n",
+ " 20%|████████▌ | 9/46 [00:00<00:01, 21.37it/s]\u001b[A\n",
+ " 26%|███████████▏ | 12/46 [00:00<00:01, 21.20it/s]\u001b[A\n",
+ " 33%|██████████████ | 15/46 [00:00<00:01, 20.31it/s]\u001b[A\n",
+ " 39%|████████████████▊ | 18/46 [00:00<00:01, 19.25it/s]\u001b[A\n",
+ " 43%|██████████████████▋ | 20/46 [00:00<00:01, 19.27it/s]\u001b[A\n",
+ " 48%|████████████████████▌ | 22/46 [00:01<00:01, 19.44it/s]\u001b[A\n",
+ " 52%|██████████████████████▍ | 24/46 [00:01<00:01, 18.62it/s]\u001b[A\n",
+ " 57%|████████████████████████▎ | 26/46 [00:01<00:01, 18.46it/s]\u001b[A\n",
+ " 61%|██████████████████████████▏ | 28/46 [00:01<00:00, 18.63it/s]\u001b[A\n",
+ " 65%|████████████████████████████ | 30/46 [00:01<00:00, 18.93it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▊ | 33/46 [00:01<00:00, 18.94it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▋ | 36/46 [00:01<00:00, 19.35it/s]\u001b[A\n",
+ " 83%|███████████████████████████████████▌ | 38/46 [00:01<00:00, 19.17it/s]\u001b[A\n",
+ " 87%|█████████████████████████████████████▍ | 40/46 [00:02<00:00, 19.01it/s]\u001b[A\n",
+ " 91%|███████████████████████████████████████▎ | 42/46 [00:02<00:00, 19.01it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 3.5356574058532715, 'eval_runtime': 2.4075, 'eval_samples_per_second': 19.107, 'eval_steps_per_second': 19.107, 'epoch': 10.0}\n",
+ "100%|█████████████████████████████████████| 5600/5600 [2:35:35<00:00, 1.59s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:02<00:00, 19.46it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-06-30 08:52:10,656 >> Saving model checkpoint to saves/qwen2-1.5b/lora/sft/checkpoint-5600\n",
+ "[INFO|configuration_utils.py:733] 2024-06-30 08:52:11,263 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-06-30 08:52:11,263 >> Model config Qwen2Config {\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-06-30 08:52:11,334 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/checkpoint-5600/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-06-30 08:52:11,334 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/checkpoint-5600/special_tokens_map.json\n",
+ "[INFO|trainer.py:2383] 2024-06-30 08:52:11,559 >> \n",
+ "\n",
+ "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
+ "\n",
+ "\n",
+ "{'train_runtime': 9336.476, 'train_samples_per_second': 4.801, 'train_steps_per_second': 0.6, 'train_loss': 0.7698830796884639, 'epoch': 10.0}\n",
+ "100%|█████████████████████████████████████| 5600/5600 [2:35:36<00:00, 1.67s/it]\n",
+ "[INFO|trainer.py:3478] 2024-06-30 08:52:11,560 >> Saving model checkpoint to saves/qwen2-1.5b/lora/sft\n",
+ "[INFO|configuration_utils.py:733] 2024-06-30 08:52:12,070 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-06-30 08:52:12,070 >> Model config Qwen2Config {\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-06-30 08:52:12,110 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-06-30 08:52:12,110 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/special_tokens_map.json\n",
+ "***** train metrics *****\n",
+ " epoch = 9.9955\n",
+ " total_flos = 27888647GF\n",
+ " train_loss = 0.7699\n",
+ " train_runtime = 2:35:36.47\n",
+ " train_samples_per_second = 4.801\n",
+ " train_steps_per_second = 0.6\n",
+ "Figure saved at: saves/qwen2-1.5b/lora/sft/training_loss.png\n",
+ "Figure saved at: saves/qwen2-1.5b/lora/sft/training_eval_loss.png\n",
+ "[INFO|trainer.py:3788] 2024-06-30 08:52:12,411 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-06-30 08:52:12,411 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-06-30 08:52:12,411 >> Batch size = 1\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:02<00:00, 19.87it/s]\n",
+ "***** eval metrics *****\n",
+ " epoch = 9.9955\n",
+ " eval_loss = 3.5357\n",
+ " eval_runtime = 0:00:02.39\n",
+ " eval_samples_per_second = 19.224\n",
+ " eval_steps_per_second = 19.224\n",
+ "[INFO|modelcard.py:449] 2024-06-30 08:52:14,805 >> Dropping the following result as it does not have all the necessary fields:\n",
+ "{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}}\n",
+ "CPU times: user 2min 11s, sys: 44.5 s, total: 2min 56s\n",
+ "Wall time: 2h 37min 48s\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "!./scripts/tune-lf.sh config/qwen2_1.5b_lora_sft.yaml"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%time\n",
+ "\n",
+ "!./scripts/tune-lf.sh config/qwen2_7b_lora_sft.yaml"
+ ]
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "application/vnd.databricks.v1+notebook": {
+ "dashboards": [],
+ "environmentMetadata": null,
+ "language": "python",
+ "notebookMetadata": {
+ "pythonIndentUnit": 4
+ },
+ "notebookName": "07_MAC_+_Qwen2-7B-Instructi_Unsloth_train",
+ "widgets": {}
+ },
+ "colab": {
+ "gpuType": "T4",
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.9"
+ },
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "036fc5746f43416db18c19ad8fd36677": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "06e806c82c7b4cbea31c5358dd9c3434": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "087b76a8b7514269b1f0ab29b062e444": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_a069d2ab23824f29aa320ac256e2cfe9",
+ "placeholder": "",
+ "style": "IPY_MODEL_06e806c82c7b4cbea31c5358dd9c3434",
+ "value": "Map (num_proc=2): 100%"
+ }
+ },
+ "09b76013aa9e45efb6deb23a7a0d0925": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_dea41c5260884aa6879b5e1d1697b14f",
+ "placeholder": "",
+ "style": "IPY_MODEL_89965917796a4f81b899fdc7685f33df",
+ "value": "config.json: 100%"
+ }
+ },
+ "0a92c56bfa134ef583220d7ef0b13e17": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "0c34be936c8145d3ab41282f30a70713": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "0f8b6bfe16894500838793f2491d403f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "177c78fce95d4b4ab33057c5a048d693": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "1f44c9ce1adf470cbb19784493ed209f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_0c34be936c8145d3ab41282f30a70713",
+ "placeholder": "",
+ "style": "IPY_MODEL_0a92c56bfa134ef583220d7ef0b13e17",
+ "value": "model.safetensors: 100%"
+ }
+ },
+ "201b59ccd9f845e197029b57e424aefc": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "2157f01726d748f8a9ae4a00664430da": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "21db8a77b00d4a4e82fdfa608657531f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "26e4202cca81496a90d15a0dd4ca9cf1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_ba90fdb8822d47dab7ba203bee297f37",
+ "IPY_MODEL_61560ff6a36b44f4a9dfdae5c52791d4",
+ "IPY_MODEL_95fbe66647904c06a20f640630d6dc0e"
+ ],
+ "layout": "IPY_MODEL_57182a263d324a3dbf1471c74290a0d5"
+ }
+ },
+ "27155728b6b84cb199c91c940095d0a8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_6b91feeed5464877991ac2c207aebe7c",
+ "IPY_MODEL_cca8113c54c0495daedce1327bf9c68b",
+ "IPY_MODEL_2e63a29e2f7247bba5beede9a568c99f"
+ ],
+ "layout": "IPY_MODEL_5c9d781c28944f3eb86e2a6d44efdf18"
+ }
+ },
+ "271ddaa553a042d09b6db7b450643d8f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "2a58d04b428c46f4b3dbadd3bc6cd529": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2d18ddf6482c4d97829ac0e5a7b9868f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_9f679ad3ec7f4fe8ad0510ffb57bc2ab",
+ "IPY_MODEL_f2df530d22c74977b249dd9fb5f4829b",
+ "IPY_MODEL_89b2ef0dbfea47ab8e6f8d659e3351d1"
+ ],
+ "layout": "IPY_MODEL_3056b148aa9f4e6e8aa3b61d26886255"
+ }
+ },
+ "2e5087c76f98437cb5dc729230358cba": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2e63a29e2f7247bba5beede9a568c99f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b993eaec6b224440bf80c0958c6fb536",
+ "placeholder": "",
+ "style": "IPY_MODEL_de868e26e7154f62aa86223a539ad421",
+ "value": " 464/464 [00:00<00:00, 27.1kB/s]"
+ }
+ },
+ "2f6c70dd266c4816bfad3fd3d192929a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "30307300bc4e4baf96560e30969a82b6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e36a3f9eff0e4cf68834d66b0213ae96",
+ "placeholder": "",
+ "style": "IPY_MODEL_a0037bdccf254159becde630bee3d1db",
+ "value": "generation_config.json: 100%"
+ }
+ },
+ "3056b148aa9f4e6e8aa3b61d26886255": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "30cdc32298134cb0be4d41615b9e5774": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "3572201bd4d74a58b7a665f9bdfdcdba": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "35b0e8c26d6640e9bd0ed7b242a423d8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_2e5087c76f98437cb5dc729230358cba",
+ "max": 51760,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_036fc5746f43416db18c19ad8fd36677",
+ "value": 51760
+ }
+ },
+ "36166c7bcb854b34aca1f41a5d6ea50b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "370692d819df41828b48c4ad446f977b": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "39b29a75374b45c0a22506010be2b84e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_30cdc32298134cb0be4d41615b9e5774",
+ "max": 1179,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_47928317548c454bba6358ab132e8dee",
+ "value": 1179
+ }
+ },
+ "3cf2dd993b5e4d3daecf61e4bab5a404": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_087b76a8b7514269b1f0ab29b062e444",
+ "IPY_MODEL_35b0e8c26d6640e9bd0ed7b242a423d8",
+ "IPY_MODEL_54ad89e05fd74576b9b8b5b5a10eaf8d"
+ ],
+ "layout": "IPY_MODEL_a41dc44766444a998bec2d777f249d23"
+ }
+ },
+ "43dec2ede91341f5af60eb522e18e984": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4463edd481c1467f914c7dcd6c6e6ffc": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "47928317548c454bba6358ab132e8dee": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "49277aeeac16434a865a4d12308b1abc": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4ae7e449e4ea4c729b5f34607c18ebae": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4b2061b8a73c43ffb0c2f83daf0d0183": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4c4c88d4c701450692fa0f6b0c5764b0": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4c666f4ace3943f8b80ecd20e7503236": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "4ccedf0d93094e63b57a0f8a434fba06": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4463edd481c1467f914c7dcd6c6e6ffc",
+ "max": 44307561,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_6d3b9a05db0b4dadb638c686faa0c40a",
+ "value": 44307561
+ }
+ },
+ "4dcf6ff672d24983a1877a8431709aa9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_5807d5fb827d490fb3bc698f801ffff5",
+ "placeholder": "",
+ "style": "IPY_MODEL_c4f2b06a82fd4987b8b659524a7b503b",
+ "value": "Generating train split: 100%"
+ }
+ },
+ "4ea63adfce694725bdba878aef709dd3": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5234566b1bfc4655b8d582ea5b46ed9f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "54ad89e05fd74576b9b8b5b5a10eaf8d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_fdb1941405ed4e4aa06019933892deb3",
+ "placeholder": "",
+ "style": "IPY_MODEL_668d5377ca56426a99753867e6e24862",
+ "value": " 51760/51760 [01:02<00:00, 1131.51 examples/s]"
+ }
+ },
+ "56aee4853b7740e6a977254f5d1fa66d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "57182a263d324a3dbf1471c74290a0d5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5807d5fb827d490fb3bc698f801ffff5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5c9d781c28944f3eb86e2a6d44efdf18": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5f40db8173dd4d76b6ef5ed6d9ec8b6e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "61560ff6a36b44f4a9dfdae5c52791d4": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_db19fc8d37db4e45a5790a876836d8c4",
+ "max": 11610,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_36166c7bcb854b34aca1f41a5d6ea50b",
+ "value": 11610
+ }
+ },
+ "6578fd7acdb54c4c93528ea431fd0144": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_370692d819df41828b48c4ad446f977b",
+ "placeholder": "",
+ "style": "IPY_MODEL_a0bf9160eb2647409b3200270914b90f",
+ "value": " 50.6k/50.6k [00:00<00:00, 2.71MB/s]"
+ }
+ },
+ "668d5377ca56426a99753867e6e24862": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "697f027529b54ee9956bae78a11e0611": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "69ac12aec0714318bf2c83d4f4e745f5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "6b2012c3f88547af8884a9ea90e3164b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_938f45f1b3e24118b815d96ae34ba86a",
+ "placeholder": "",
+ "style": "IPY_MODEL_9367047a800747f79c6b225d92397846",
+ "value": " 44.3M/44.3M [00:01<00:00, 31.0MB/s]"
+ }
+ },
+ "6b91feeed5464877991ac2c207aebe7c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4b2061b8a73c43ffb0c2f83daf0d0183",
+ "placeholder": "",
+ "style": "IPY_MODEL_69ac12aec0714318bf2c83d4f4e745f5",
+ "value": "special_tokens_map.json: 100%"
+ }
+ },
+ "6d3b9a05db0b4dadb638c686faa0c40a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "6dbbedeca9314e66ae50e44ffa31a414": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "6e34619b45934040b6092e6fb01ea7fe": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "71ce208e20d6483abb9ed923510c86d7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d69dc491b3ab44d7852b21873ed7bb7f",
+ "placeholder": "",
+ "style": "IPY_MODEL_f401d53bf28e44eb906bce6c05412662",
+ "value": " 51760/51760 [00:01<00:00, 45512.81 examples/s]"
+ }
+ },
+ "7358cdad832342c983e31efb8754ab78": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "73e352a3404f4c7dad0737f57d29e92f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_988a0e8c1f89446086858da0a891a79c",
+ "IPY_MODEL_4ccedf0d93094e63b57a0f8a434fba06",
+ "IPY_MODEL_6b2012c3f88547af8884a9ea90e3164b"
+ ],
+ "layout": "IPY_MODEL_7e29cb8dd4df4d5b94407cd8fd3f2011"
+ }
+ },
+ "74501720ac7e4dbb911a4a99b3633bc6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "78e5400bff924a92a4cc61c4ff18b182": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b9b313fd861948f5aba25b24b1518d30",
+ "placeholder": "",
+ "style": "IPY_MODEL_4c666f4ace3943f8b80ecd20e7503236",
+ "value": " 1.18k/1.18k [00:00<00:00, 31.3kB/s]"
+ }
+ },
+ "7975adbc2ec5489ea7fa0167e620d85c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_6e34619b45934040b6092e6fb01ea7fe",
+ "max": 51760,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_271ddaa553a042d09b6db7b450643d8f",
+ "value": 51760
+ }
+ },
+ "7e29cb8dd4df4d5b94407cd8fd3f2011": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "810ff6c0e17d4fa09a30fef27eacff90": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "89965917796a4f81b899fdc7685f33df": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "89b2ef0dbfea47ab8e6f8d659e3351d1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b8908fa0df3743ecb9d12983a739104f",
+ "placeholder": "",
+ "style": "IPY_MODEL_177c78fce95d4b4ab33057c5a048d693",
+ "value": " 9.09M/9.09M [00:00<00:00, 32.6MB/s]"
+ }
+ },
+ "8b3505352a5a42bf910428c40ce40465": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_49277aeeac16434a865a4d12308b1abc",
+ "placeholder": "",
+ "style": "IPY_MODEL_2157f01726d748f8a9ae4a00664430da",
+ "value": " 5.70G/5.70G [01:02<00:00, 30.1MB/s]"
+ }
+ },
+ "8fc142b628fb40568730234de1cafde2": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4ae7e449e4ea4c729b5f34607c18ebae",
+ "max": 172,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_3572201bd4d74a58b7a665f9bdfdcdba",
+ "value": 172
+ }
+ },
+ "9367047a800747f79c6b225d92397846": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "938f45f1b3e24118b815d96ae34ba86a": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "95fbe66647904c06a20f640630d6dc0e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b0a370dc20654b279b9680692e34418e",
+ "placeholder": "",
+ "style": "IPY_MODEL_cfeb365ddf7548d58b2557f22737fcf5",
+ "value": " 11.6k/11.6k [00:00<00:00, 716kB/s]"
+ }
+ },
+ "988a0e8c1f89446086858da0a891a79c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_ad2be500fc164c0f86f33e914ef8e6a0",
+ "placeholder": "",
+ "style": "IPY_MODEL_5234566b1bfc4655b8d582ea5b46ed9f",
+ "value": "Downloading data: 100%"
+ }
+ },
+ "98c58f23f4d549518832cb2d18f796e8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_09b76013aa9e45efb6deb23a7a0d0925",
+ "IPY_MODEL_39b29a75374b45c0a22506010be2b84e",
+ "IPY_MODEL_78e5400bff924a92a4cc61c4ff18b182"
+ ],
+ "layout": "IPY_MODEL_2a58d04b428c46f4b3dbadd3bc6cd529"
+ }
+ },
+ "99fdbb0300c14c139d1937c646f0cfe7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_7358cdad832342c983e31efb8754ab78",
+ "placeholder": "",
+ "style": "IPY_MODEL_e9adf418296e436fb48bb9f78885598b",
+ "value": " 51760/51760 [00:01<00:00, 38665.95 examples/s]"
+ }
+ },
+ "9f679ad3ec7f4fe8ad0510ffb57bc2ab": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4ea63adfce694725bdba878aef709dd3",
+ "placeholder": "",
+ "style": "IPY_MODEL_74501720ac7e4dbb911a4a99b3633bc6",
+ "value": "tokenizer.json: 100%"
+ }
+ },
+ "a0037bdccf254159becde630bee3d1db": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "a069d2ab23824f29aa320ac256e2cfe9": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a0bf9160eb2647409b3200270914b90f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "a41dc44766444a998bec2d777f249d23": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a8464a4c711e4e00aafdfc919b60d07e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_fb995c740590427b882572c81d4e848c",
+ "placeholder": "",
+ "style": "IPY_MODEL_201b59ccd9f845e197029b57e424aefc",
+ "value": " 172/172 [00:00<00:00, 12.0kB/s]"
+ }
+ },
+ "a9f0cc51fc3d4d7b874c32dcf1c5bdf2": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ad2be500fc164c0f86f33e914ef8e6a0": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b0240cd9a4554b29ae11f8051984a1c6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_edaf890370314a218f138015faa0b05d",
+ "placeholder": "",
+ "style": "IPY_MODEL_697f027529b54ee9956bae78a11e0611",
+ "value": "Map: 100%"
+ }
+ },
+ "b0a370dc20654b279b9680692e34418e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b518dcee69074b87be73957cd810e7ed": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d891f8d0b1fc462f8008d02bb2a15692",
+ "placeholder": "",
+ "style": "IPY_MODEL_cced8fd7e998472794f3f3e3018956a5",
+ "value": "tokenizer_config.json: 100%"
+ }
+ },
+ "b8908fa0df3743ecb9d12983a739104f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b993eaec6b224440bf80c0958c6fb536": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b9b313fd861948f5aba25b24b1518d30": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ba90fdb8822d47dab7ba203bee297f37": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_0f8b6bfe16894500838793f2491d403f",
+ "placeholder": "",
+ "style": "IPY_MODEL_bb19f6c747754682a514373a3a0535ba",
+ "value": "Downloading readme: 100%"
+ }
+ },
+ "bb19f6c747754682a514373a3a0535ba": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "bc883d4cf13e4f8b8a4fe5f410cb6efd": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e9159e03e61f4f56978ece9c3bca49b2",
+ "max": 51760,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_810ff6c0e17d4fa09a30fef27eacff90",
+ "value": 51760
+ }
+ },
+ "c161d94df0f04feba9542237e0856c22": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "c22f71b1f85843209d7e5321506b9cb9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_1f44c9ce1adf470cbb19784493ed209f",
+ "IPY_MODEL_f1addc4479d849879e743cf9089e6540",
+ "IPY_MODEL_8b3505352a5a42bf910428c40ce40465"
+ ],
+ "layout": "IPY_MODEL_4c4c88d4c701450692fa0f6b0c5764b0"
+ }
+ },
+ "c4f2b06a82fd4987b8b659524a7b503b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "cca8113c54c0495daedce1327bf9c68b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e02f9b7849c64531835eb77b860d1c93",
+ "max": 464,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_56aee4853b7740e6a977254f5d1fa66d",
+ "value": 464
+ }
+ },
+ "cced8fd7e998472794f3f3e3018956a5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "cf245afeb1c04f29a24d291608c3d157": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_b518dcee69074b87be73957cd810e7ed",
+ "IPY_MODEL_e29104486d594b2992d7285e0ef77371",
+ "IPY_MODEL_6578fd7acdb54c4c93528ea431fd0144"
+ ],
+ "layout": "IPY_MODEL_d35db8148a354c56aaac56dbae22536f"
+ }
+ },
+ "cfe8cae0e22b495bafa221a63d13b283": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "cfeb365ddf7548d58b2557f22737fcf5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "d1b47d39450d4019ae85c9b2f943eeaf": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_4dcf6ff672d24983a1877a8431709aa9",
+ "IPY_MODEL_7975adbc2ec5489ea7fa0167e620d85c",
+ "IPY_MODEL_71ce208e20d6483abb9ed923510c86d7"
+ ],
+ "layout": "IPY_MODEL_cfe8cae0e22b495bafa221a63d13b283"
+ }
+ },
+ "d35db8148a354c56aaac56dbae22536f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d69dc491b3ab44d7852b21873ed7bb7f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d891f8d0b1fc462f8008d02bb2a15692": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d8e5318cead340c4adbeaccc05d39225": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "daf4cd890b35422683d22fd30bc71e83": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_b0240cd9a4554b29ae11f8051984a1c6",
+ "IPY_MODEL_bc883d4cf13e4f8b8a4fe5f410cb6efd",
+ "IPY_MODEL_99fdbb0300c14c139d1937c646f0cfe7"
+ ],
+ "layout": "IPY_MODEL_c161d94df0f04feba9542237e0856c22"
+ }
+ },
+ "db19fc8d37db4e45a5790a876836d8c4": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "de868e26e7154f62aa86223a539ad421": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "dea41c5260884aa6879b5e1d1697b14f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e02f9b7849c64531835eb77b860d1c93": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e29104486d594b2992d7285e0ef77371": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_a9f0cc51fc3d4d7b874c32dcf1c5bdf2",
+ "max": 50641,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_2f6c70dd266c4816bfad3fd3d192929a",
+ "value": 50641
+ }
+ },
+ "e36a3f9eff0e4cf68834d66b0213ae96": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e9159e03e61f4f56978ece9c3bca49b2": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e9adf418296e436fb48bb9f78885598b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "edaf890370314a218f138015faa0b05d": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "f1addc4479d849879e743cf9089e6540": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_43dec2ede91341f5af60eb522e18e984",
+ "max": 5702746405,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_d8e5318cead340c4adbeaccc05d39225",
+ "value": 5702746405
+ }
+ },
+ "f2df530d22c74977b249dd9fb5f4829b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_21db8a77b00d4a4e82fdfa608657531f",
+ "max": 9085698,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_6dbbedeca9314e66ae50e44ffa31a414",
+ "value": 9085698
+ }
+ },
+ "f401d53bf28e44eb906bce6c05412662": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "fb995c740590427b882572c81d4e848c": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "fce7a61c25ec4390af43d92b7c473a45": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_30307300bc4e4baf96560e30969a82b6",
+ "IPY_MODEL_8fc142b628fb40568730234de1cafde2",
+ "IPY_MODEL_a8464a4c711e4e00aafdfc919b60d07e"
+ ],
+ "layout": "IPY_MODEL_5f40db8173dd4d76b6ef5ed6d9ec8b6e"
+ }
+ },
+ "fdb1941405ed4e4aa06019933892deb3": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ }
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/notebooks/07r2_tune-lf-py3.11.ipynb b/notebooks/07r2_tune-lf-py3.11.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..7d1c5ea9813aade98e46209dc20f2008610ac75e
--- /dev/null
+++ b/notebooks/07r2_tune-lf-py3.11.ipynb
@@ -0,0 +1,9938 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "0ea8b46b-839b-445b-8043-ccdf4e920ace",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [],
+ "source": [
+ "%load_ext autoreload\n",
+ "%autoreload 2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "6d394937-6c99-4a7c-9d32-7600a280032f",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "workding dir: /home/inflaton/code/projects/courses/llm-finetuning\n"
+ ]
+ }
+ ],
+ "source": [
+ "import os\n",
+ "import sys\n",
+ "from pathlib import Path\n",
+ "\n",
+ "workding_dir = str(Path.cwd().parent)\n",
+ "os.chdir(workding_dir)\n",
+ "sys.path.append(workding_dir)\n",
+ "print(\"workding dir:\", workding_dir)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "9f67ec60-2f24-411c-84eb-0dd664b44775",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "True"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from dotenv import find_dotenv, load_dotenv\n",
+ "\n",
+ "found_dotenv = find_dotenv(\".env\")\n",
+ "\n",
+ "if len(found_dotenv) == 0:\n",
+ " found_dotenv = find_dotenv(\".env.example\")\n",
+ "print(f\"loading env vars from: {found_dotenv}\")\n",
+ "load_dotenv(found_dotenv, override=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "f1597656-8042-4878-9d3b-9ebfb8dd86dc",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "('unsloth/Qwen2-0.5B-Instruct',\n",
+ " True,\n",
+ " None,\n",
+ " None,\n",
+ " 2048,\n",
+ " 6,\n",
+ " None,\n",
+ " 'datasets/mac/mac.tsv',\n",
+ " 'results/mac-results_lf-r2.csv')"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import os\n",
+ "\n",
+ "model_name = os.getenv(\"MODEL_NAME\")\n",
+ "token = os.getenv(\"HF_TOKEN\") or None\n",
+ "load_in_4bit = os.getenv(\"LOAD_IN_4BIT\") == \"true\"\n",
+ "local_model = os.getenv(\"LOCAL_MODEL\")\n",
+ "hub_model = os.getenv(\"HUB_MODEL\")\n",
+ "num_train_epochs = int(os.getenv(\"NUM_TRAIN_EPOCHS\") or 0)\n",
+ "data_path = os.getenv(\"DATA_PATH\")\n",
+ "results_path = os.getenv(\"RESULTS_PATH\")\n",
+ "\n",
+ "max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!\n",
+ "dtype = (\n",
+ " None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+\n",
+ ")\n",
+ "\n",
+ "model_name, load_in_4bit, local_model, hub_model, max_seq_length, num_train_epochs, dtype, data_path, results_path"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Thu Jul 4 11:06:16 2024 \n",
+ "+---------------------------------------------------------------------------------------+\n",
+ "| NVIDIA-SMI 545.23.07 Driver Version: 546.12 CUDA Version: 12.3 |\n",
+ "|-----------------------------------------+----------------------+----------------------+\n",
+ "| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
+ "| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n",
+ "| | | MIG M. |\n",
+ "|=========================================+======================+======================|\n",
+ "| 0 NVIDIA GeForce RTX 4080 ... On | 00000000:01:00.0 Off | N/A |\n",
+ "| N/A 52C P8 3W / 150W | 355MiB / 12282MiB | 0% Default |\n",
+ "| | | N/A |\n",
+ "+-----------------------------------------+----------------------+----------------------+\n",
+ " \n",
+ "+---------------------------------------------------------------------------------------+\n",
+ "| Processes: |\n",
+ "| GPU GI CI PID Type Process name GPU Memory |\n",
+ "| ID ID Usage |\n",
+ "|=======================================================================================|\n",
+ "| No running processes found |\n",
+ "+---------------------------------------------------------------------------------------+\n"
+ ]
+ }
+ ],
+ "source": [
+ "!nvidia-smi"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n",
+ "loading train/test data files\n",
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n"
+ ]
+ }
+ ],
+ "source": [
+ "from llm_toolkit.translation_engine import load_translation_dataset\n",
+ "\n",
+ "dataset = load_translation_dataset(data_path)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = dataset[\"train\"].to_pandas()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "\n",
+ "df_alpaca = pd.DataFrame({\"instruction\": [\"Please translate the following Chinese text into English and provide only the translated content, nothing else.\"]*len(df)})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " instruction \n",
+ " input \n",
+ " output \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " Please translate the following Chinese text in... \n",
+ " 全仗着狐仙搭救。 \n",
+ " Because I was protected by a fox fairy. \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " Please translate the following Chinese text in... \n",
+ " 过后,表哥告诉她俩,这人是导演,在外国留过学的,还会编剧,今天拍的这戏,就是他自编自导的。 \n",
+ " He was the director, the cousin later told the... \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " Please translate the following Chinese text in... \n",
+ " 这凤姐忽然想起一件事来,便向窗外叫:“蓉儿回来!” \n",
+ " Xi-feng suddenly seemed to remember something,... \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " Please translate the following Chinese text in... \n",
+ " 三个老红卫兵走到叶文洁面前,面对着她站成了一排——当年,她们也是这样面对叶哲泰的——试图再现... \n",
+ " The three old Red Guards stood in front of Ye ... \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " Please translate the following Chinese text in... \n",
+ " 程先生照单全收,都是一个“谢”字,然后问王琦瑶有什么话说。 \n",
+ " Mr. Cheng accepted their toast with equanimity... \n",
+ " \n",
+ " \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " \n",
+ " \n",
+ " 4523 \n",
+ " Please translate the following Chinese text in... \n",
+ " 外边有两张腿歪面裂的八仙桌子,桌旁胡乱搡着几条狭窄的木凳。 \n",
+ " Two rickety tables with scarred tops and a few... \n",
+ " \n",
+ " \n",
+ " 4524 \n",
+ " Please translate the following Chinese text in... \n",
+ " 贾瑞听了,喜的抓耳挠腮。 \n",
+ " At this last remark Jia Rui positively scratch... \n",
+ " \n",
+ " \n",
+ " 4525 \n",
+ " Please translate the following Chinese text in... \n",
+ " 听了这样的评价,我们心情激动,和大家一起振臂高呼:打倒王二! \n",
+ " Hearing comments like this, our emotions were ... \n",
+ " \n",
+ " \n",
+ " 4526 \n",
+ " Please translate the following Chinese text in... \n",
+ " 海老公道:“记住了吗?” \n",
+ " 'Can you remember that?' \n",
+ " \n",
+ " \n",
+ " 4527 \n",
+ " Please translate the following Chinese text in... \n",
+ " 上面说,这样写缺少细节。 \n",
+ " This time the opinions from above said it need... \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
4528 rows × 3 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " instruction \\\n",
+ "0 Please translate the following Chinese text in... \n",
+ "1 Please translate the following Chinese text in... \n",
+ "2 Please translate the following Chinese text in... \n",
+ "3 Please translate the following Chinese text in... \n",
+ "4 Please translate the following Chinese text in... \n",
+ "... ... \n",
+ "4523 Please translate the following Chinese text in... \n",
+ "4524 Please translate the following Chinese text in... \n",
+ "4525 Please translate the following Chinese text in... \n",
+ "4526 Please translate the following Chinese text in... \n",
+ "4527 Please translate the following Chinese text in... \n",
+ "\n",
+ " input \\\n",
+ "0 全仗着狐仙搭救。 \n",
+ "1 过后,表哥告诉她俩,这人是导演,在外国留过学的,还会编剧,今天拍的这戏,就是他自编自导的。 \n",
+ "2 这凤姐忽然想起一件事来,便向窗外叫:“蓉儿回来!” \n",
+ "3 三个老红卫兵走到叶文洁面前,面对着她站成了一排——当年,她们也是这样面对叶哲泰的——试图再现... \n",
+ "4 程先生照单全收,都是一个“谢”字,然后问王琦瑶有什么话说。 \n",
+ "... ... \n",
+ "4523 外边有两张腿歪面裂的八仙桌子,桌旁胡乱搡着几条狭窄的木凳。 \n",
+ "4524 贾瑞听了,喜的抓耳挠腮。 \n",
+ "4525 听了这样的评价,我们心情激动,和大家一起振臂高呼:打倒王二! \n",
+ "4526 海老公道:“记住了吗?” \n",
+ "4527 上面说,这样写缺少细节。 \n",
+ "\n",
+ " output \n",
+ "0 Because I was protected by a fox fairy. \n",
+ "1 He was the director, the cousin later told the... \n",
+ "2 Xi-feng suddenly seemed to remember something,... \n",
+ "3 The three old Red Guards stood in front of Ye ... \n",
+ "4 Mr. Cheng accepted their toast with equanimity... \n",
+ "... ... \n",
+ "4523 Two rickety tables with scarred tops and a few... \n",
+ "4524 At this last remark Jia Rui positively scratch... \n",
+ "4525 Hearing comments like this, our emotions were ... \n",
+ "4526 'Can you remember that?' \n",
+ "4527 This time the opinions from above said it need... \n",
+ "\n",
+ "[4528 rows x 3 columns]"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_alpaca[\"input\"] = df[\"chinese\"]\n",
+ "df_alpaca[\"output\"] = df[\"english\"]\n",
+ "df_alpaca"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_alpaca.to_json(\n",
+ " \"llama-factory/data/alpaca_mac.json\", orient=\"records\", lines=False, indent=2\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.read_json(\"llama-factory/data/alpaca_mac.json\", orient=\"records\", lines=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " instruction \n",
+ " input \n",
+ " output \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " Please translate the following Chinese text in... \n",
+ " 全仗着狐仙搭救。 \n",
+ " Because I was protected by a fox fairy. \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " Please translate the following Chinese text in... \n",
+ " 过后,表哥告诉她俩,这人是导演,在外国留过学的,还会编剧,今天拍的这戏,就是他自编自导的。 \n",
+ " He was the director, the cousin later told the... \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " Please translate the following Chinese text in... \n",
+ " 这凤姐忽然想起一件事来,便向窗外叫:“蓉儿回来!” \n",
+ " Xi-feng suddenly seemed to remember something,... \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " Please translate the following Chinese text in... \n",
+ " 三个老红卫兵走到叶文洁面前,面对着她站成了一排——当年,她们也是这样面对叶哲泰的——试图再现... \n",
+ " The three old Red Guards stood in front of Ye ... \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " Please translate the following Chinese text in... \n",
+ " 程先生照单全收,都是一个“谢”字,然后问王琦瑶有什么话说。 \n",
+ " Mr. Cheng accepted their toast with equanimity... \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " instruction \\\n",
+ "0 Please translate the following Chinese text in... \n",
+ "1 Please translate the following Chinese text in... \n",
+ "2 Please translate the following Chinese text in... \n",
+ "3 Please translate the following Chinese text in... \n",
+ "4 Please translate the following Chinese text in... \n",
+ "\n",
+ " input \\\n",
+ "0 全仗着狐仙搭救。 \n",
+ "1 过后,表哥告诉她俩,这人是导演,在外国留过学的,还会编剧,今天拍的这戏,就是他自编自导的。 \n",
+ "2 这凤姐忽然想起一件事来,便向窗外叫:“蓉儿回来!” \n",
+ "3 三个老红卫兵走到叶文洁面前,面对着她站成了一排——当年,她们也是这样面对叶哲泰的——试图再现... \n",
+ "4 程先生照单全收,都是一个“谢”字,然后问王琦瑶有什么话说。 \n",
+ "\n",
+ " output \n",
+ "0 Because I was protected by a fox fairy. \n",
+ "1 He was the director, the cousin later told the... \n",
+ "2 Xi-feng suddenly seemed to remember something,... \n",
+ "3 The three old Red Guards stood in front of Ye ... \n",
+ "4 Mr. Cheng accepted their toast with equanimity... "
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Python 3.11.9\n",
+ "\u001b[33mWARNING: Package(s) not found: flash-attn\u001b[0m\u001b[33m\n",
+ "\u001b[0mCPU times: user 5.39 ms, sys: 19.5 ms, total: 24.9 ms\n",
+ "Wall time: 527 ms\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "!python --version\n",
+ "!pip show flash-attn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Current Directory:\n",
+ "/home/inflaton/code/projects/courses/llm-finetuning/llama-factory\n",
+ "07/04/2024 11:09:05 - INFO - llamafactory.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, compute dtype: torch.bfloat16\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 11:09:06,545 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/vocab.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 11:09:06,545 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/merges.txt\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 11:09:06,545 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 11:09:06,545 >> loading file added_tokens.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 11:09:06,545 >> loading file special_tokens_map.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 11:09:06,545 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer_config.json\n",
+ "[WARNING|logging.py:313] 2024-07-04 11:09:06,662 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "07/04/2024 11:09:06 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+ "07/04/2024 11:09:06 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+ "07/04/2024 11:09:06 - INFO - llamafactory.data.loader - Loading dataset alpaca_mac.json...\n",
+ "Converting format of dataset (num_proc=16): 100%|█| 4528/4528 [00:00<00:00, 1685\n",
+ "Running tokenizer on dataset (num_proc=16): 100%|█| 4528/4528 [00:01<00:00, 3476\n",
+ "input_ids:\n",
+ "[151644, 872, 198, 5501, 14683, 279, 2701, 8453, 1467, 1119, 6364, 323, 3410, 1172, 279, 24531, 2213, 11, 4302, 770, 624, 35987, 102895, 99164, 100324, 100717, 100095, 99509, 1773, 151645, 198, 151644, 77091, 198, 17949, 358, 572, 2617, 553, 264, 38835, 44486, 13, 151645]\n",
+ "inputs:\n",
+ "<|im_start|>user\n",
+ "Please translate the following Chinese text into English and provide only the translated content, nothing else.\n",
+ "全仗着狐仙搭救。<|im_end|>\n",
+ "<|im_start|>assistant\n",
+ "Because I was protected by a fox fairy.<|im_end|>\n",
+ "label_ids:\n",
+ "[-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 17949, 358, 572, 2617, 553, 264, 38835, 44486, 13, 151645]\n",
+ "labels:\n",
+ "Because I was protected by a fox fairy.<|im_end|>\n",
+ "[INFO|configuration_utils.py:733] 2024-07-04 11:09:09,749 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-04 11:09:09,750 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|modeling_utils.py:3556] 2024-07-04 11:09:09,841 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/model.safetensors\n",
+ "[INFO|modeling_utils.py:1531] 2024-07-04 11:09:13,066 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+ "[INFO|configuration_utils.py:1000] 2024-07-04 11:09:13,069 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645\n",
+ "}\n",
+ "\n",
+ "[INFO|modeling_utils.py:4364] 2024-07-04 11:10:03,269 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+ "\n",
+ "[INFO|modeling_utils.py:4372] 2024-07-04 11:10:03,270 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-0.5B-Instruct.\n",
+ "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+ "[INFO|configuration_utils.py:955] 2024-07-04 11:10:03,578 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/generation_config.json\n",
+ "[INFO|configuration_utils.py:1000] 2024-07-04 11:10:03,578 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"do_sample\": true,\n",
+ " \"eos_token_id\": [\n",
+ " 151645,\n",
+ " 151643\n",
+ " ],\n",
+ " \"pad_token_id\": 151643,\n",
+ " \"repetition_penalty\": 1.1,\n",
+ " \"temperature\": 0.7,\n",
+ " \"top_k\": 20,\n",
+ " \"top_p\": 0.8\n",
+ "}\n",
+ "\n",
+ "07/04/2024 11:10:03 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.\n",
+ "07/04/2024 11:10:03 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
+ "07/04/2024 11:10:03 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.\n",
+ "07/04/2024 11:10:03 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA\n",
+ "07/04/2024 11:10:03 - INFO - llamafactory.model.model_utils.misc - Found linear modules: up_proj,down_proj,k_proj,q_proj,v_proj,o_proj,gate_proj\n",
+ "07/04/2024 11:10:04 - INFO - llamafactory.model.loader - trainable params: 4,399,104 || all params: 498,431,872 || trainable%: 0.8826\n",
+ "[INFO|trainer.py:642] 2024-07-04 11:10:04,049 >> Using auto half precision backend\n",
+ "07/04/2024 11:10:04 - WARNING - llamafactory.train.callbacks - Previous trainer log in this folder will be deleted.\n",
+ "[INFO|trainer.py:2128] 2024-07-04 11:10:04,194 >> ***** Running training *****\n",
+ "[INFO|trainer.py:2129] 2024-07-04 11:10:04,194 >> Num examples = 4,482\n",
+ "[INFO|trainer.py:2130] 2024-07-04 11:10:04,194 >> Num Epochs = 6\n",
+ "[INFO|trainer.py:2131] 2024-07-04 11:10:04,194 >> Instantaneous batch size per device = 1\n",
+ "[INFO|trainer.py:2134] 2024-07-04 11:10:04,194 >> Total train batch size (w. parallel, distributed & accumulation) = 8\n",
+ "[INFO|trainer.py:2135] 2024-07-04 11:10:04,194 >> Gradient Accumulation steps = 8\n",
+ "[INFO|trainer.py:2136] 2024-07-04 11:10:04,195 >> Total optimization steps = 3,360\n",
+ "[INFO|trainer.py:2137] 2024-07-04 11:10:04,196 >> Number of trainable parameters = 4,399,104\n",
+ "[INFO|integration_utils.py:750] 2024-07-04 11:10:04,198 >> Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33minflaton-sg\u001b[0m (\u001b[33minflaton-ai\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.17.4\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m/home/inflaton/code/projects/courses/llm-finetuning/llama-factory/wandb/run-20240704_111005-u8sqhi0x\u001b[0m\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mqwen2_0.5b_lora_sft\u001b[0m\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface\u001b[0m\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface/runs/u8sqhi0x\u001b[0m\n",
+ "{'loss': 2.581, 'grad_norm': 2.9743993282318115, 'learning_rate': 2.9761904761904763e-06, 'epoch': 0.02}\n",
+ "{'loss': 2.704, 'grad_norm': 3.803558826446533, 'learning_rate': 5.9523809523809525e-06, 'epoch': 0.04}\n",
+ "{'loss': 2.5764, 'grad_norm': 2.419433116912842, 'learning_rate': 8.92857142857143e-06, 'epoch': 0.05}\n",
+ "{'loss': 2.4994, 'grad_norm': 4.8528876304626465, 'learning_rate': 1.1904761904761905e-05, 'epoch': 0.07}\n",
+ "{'loss': 2.6881, 'grad_norm': 2.5375239849090576, 'learning_rate': 1.4880952380952381e-05, 'epoch': 0.09}\n",
+ "{'loss': 2.3869, 'grad_norm': 2.810744524002075, 'learning_rate': 1.785714285714286e-05, 'epoch': 0.11}\n",
+ "{'loss': 2.5728, 'grad_norm': 2.6387815475463867, 'learning_rate': 2.0833333333333336e-05, 'epoch': 0.12}\n",
+ "{'loss': 2.3077, 'grad_norm': 2.4742910861968994, 'learning_rate': 2.380952380952381e-05, 'epoch': 0.14}\n",
+ "{'loss': 2.4318, 'grad_norm': 3.0079479217529297, 'learning_rate': 2.6785714285714288e-05, 'epoch': 0.16}\n",
+ "{'loss': 2.29, 'grad_norm': 2.584622859954834, 'learning_rate': 2.9761904761904762e-05, 'epoch': 0.18}\n",
+ "{'loss': 2.3407, 'grad_norm': 3.3264784812927246, 'learning_rate': 3.273809523809524e-05, 'epoch': 0.2}\n",
+ "{'loss': 2.3577, 'grad_norm': 2.667269468307495, 'learning_rate': 3.571428571428572e-05, 'epoch': 0.21}\n",
+ "{'loss': 2.2612, 'grad_norm': 2.8811182975769043, 'learning_rate': 3.8690476190476195e-05, 'epoch': 0.23}\n",
+ "{'loss': 2.3096, 'grad_norm': 3.249279499053955, 'learning_rate': 4.166666666666667e-05, 'epoch': 0.25}\n",
+ "{'loss': 2.183, 'grad_norm': 2.5008630752563477, 'learning_rate': 4.464285714285715e-05, 'epoch': 0.27}\n",
+ "{'loss': 2.23, 'grad_norm': 2.457791328430176, 'learning_rate': 4.761904761904762e-05, 'epoch': 0.29}\n",
+ "{'loss': 2.3025, 'grad_norm': 2.4453022480010986, 'learning_rate': 5.05952380952381e-05, 'epoch': 0.3}\n",
+ "{'loss': 2.0884, 'grad_norm': 2.7773451805114746, 'learning_rate': 5.3571428571428575e-05, 'epoch': 0.32}\n",
+ "{'loss': 2.2048, 'grad_norm': 3.600346565246582, 'learning_rate': 5.6547619047619046e-05, 'epoch': 0.34}\n",
+ "{'loss': 2.3676, 'grad_norm': 2.939140796661377, 'learning_rate': 5.9523809523809524e-05, 'epoch': 0.36}\n",
+ "{'loss': 2.2684, 'grad_norm': 2.7832212448120117, 'learning_rate': 6.25e-05, 'epoch': 0.37}\n",
+ "{'loss': 2.2021, 'grad_norm': 3.7691140174865723, 'learning_rate': 6.547619047619048e-05, 'epoch': 0.39}\n",
+ "{'loss': 2.1625, 'grad_norm': 3.3338756561279297, 'learning_rate': 6.845238095238096e-05, 'epoch': 0.41}\n",
+ "{'loss': 2.3564, 'grad_norm': 4.061848163604736, 'learning_rate': 7.142857142857143e-05, 'epoch': 0.43}\n",
+ "{'loss': 2.2266, 'grad_norm': 3.3382863998413086, 'learning_rate': 7.440476190476191e-05, 'epoch': 0.45}\n",
+ "{'loss': 2.1837, 'grad_norm': 3.208007335662842, 'learning_rate': 7.738095238095239e-05, 'epoch': 0.46}\n",
+ "{'loss': 2.1765, 'grad_norm': 4.045449733734131, 'learning_rate': 8.035714285714287e-05, 'epoch': 0.48}\n",
+ "{'loss': 2.2863, 'grad_norm': 4.37124490737915, 'learning_rate': 8.333333333333334e-05, 'epoch': 0.5}\n",
+ "{'loss': 2.0807, 'grad_norm': 2.6629326343536377, 'learning_rate': 8.630952380952382e-05, 'epoch': 0.52}\n",
+ "{'loss': 2.2086, 'grad_norm': 3.6005942821502686, 'learning_rate': 8.92857142857143e-05, 'epoch': 0.54}\n",
+ "{'loss': 2.2231, 'grad_norm': 4.065690040588379, 'learning_rate': 9.226190476190478e-05, 'epoch': 0.55}\n",
+ "{'loss': 1.9875, 'grad_norm': 6.6260294914245605, 'learning_rate': 9.523809523809524e-05, 'epoch': 0.57}\n",
+ "{'loss': 2.0721, 'grad_norm': 4.6804656982421875, 'learning_rate': 9.821428571428572e-05, 'epoch': 0.59}\n",
+ "{'loss': 2.1194, 'grad_norm': 4.226340293884277, 'learning_rate': 9.999956828659095e-05, 'epoch': 0.61}\n",
+ "{'loss': 2.1256, 'grad_norm': 4.530922889709473, 'learning_rate': 9.999471159635539e-05, 'epoch': 0.62}\n",
+ "{'loss': 2.0243, 'grad_norm': 3.235328197479248, 'learning_rate': 9.998445910004082e-05, 'epoch': 0.64}\n",
+ "{'loss': 2.2819, 'grad_norm': 4.247537136077881, 'learning_rate': 9.996881190417393e-05, 'epoch': 0.66}\n",
+ "{'loss': 2.1964, 'grad_norm': 3.339164972305298, 'learning_rate': 9.994777169751806e-05, 'epoch': 0.68}\n",
+ "{'loss': 1.9102, 'grad_norm': 2.744009494781494, 'learning_rate': 9.992134075089084e-05, 'epoch': 0.7}\n",
+ "{'loss': 2.0751, 'grad_norm': 3.513111114501953, 'learning_rate': 9.988952191691925e-05, 'epoch': 0.71}\n",
+ "{'loss': 2.1697, 'grad_norm': 3.301513433456421, 'learning_rate': 9.985231862973168e-05, 'epoch': 0.73}\n",
+ "{'loss': 2.1742, 'grad_norm': 2.8456363677978516, 'learning_rate': 9.980973490458728e-05, 'epoch': 0.75}\n",
+ "{'loss': 2.1497, 'grad_norm': 3.499181032180786, 'learning_rate': 9.976177533744261e-05, 'epoch': 0.77}\n",
+ "{'loss': 2.0643, 'grad_norm': 4.2905964851379395, 'learning_rate': 9.97084451044556e-05, 'epoch': 0.79}\n",
+ "{'loss': 1.9934, 'grad_norm': 2.706711769104004, 'learning_rate': 9.964974996142698e-05, 'epoch': 0.8}\n",
+ "{'loss': 2.0795, 'grad_norm': 3.038059949874878, 'learning_rate': 9.958569624317893e-05, 'epoch': 0.82}\n",
+ "{'loss': 2.0908, 'grad_norm': 4.291042804718018, 'learning_rate': 9.951629086287151e-05, 'epoch': 0.84}\n",
+ "{'loss': 2.105, 'grad_norm': 3.027702808380127, 'learning_rate': 9.944154131125642e-05, 'epoch': 0.86}\n",
+ "{'loss': 2.112, 'grad_norm': 2.6875832080841064, 'learning_rate': 9.936145565586871e-05, 'epoch': 0.87}\n",
+ "{'loss': 2.2824, 'grad_norm': 2.8110086917877197, 'learning_rate': 9.927604254015585e-05, 'epoch': 0.89}\n",
+ "{'loss': 2.2181, 'grad_norm': 3.3072471618652344, 'learning_rate': 9.918531118254507e-05, 'epoch': 0.91}\n",
+ "{'loss': 2.1132, 'grad_norm': 3.8883237838745117, 'learning_rate': 9.90892713754483e-05, 'epoch': 0.93}\n",
+ "{'loss': 2.1513, 'grad_norm': 3.775455951690674, 'learning_rate': 9.898793348420536e-05, 'epoch': 0.95}\n",
+ "{'loss': 2.1119, 'grad_norm': 3.0280404090881348, 'learning_rate': 9.888130844596524e-05, 'epoch': 0.96}\n",
+ "{'loss': 2.1126, 'grad_norm': 3.2323291301727295, 'learning_rate': 9.876940776850569e-05, 'epoch': 0.98}\n",
+ "{'loss': 2.1328, 'grad_norm': 2.91339373588562, 'learning_rate': 9.865224352899119e-05, 'epoch': 1.0}\n",
+ " 17%|██████▎ | 560/3360 [12:30<1:02:53, 1.35s/it][INFO|trainer.py:3788] 2024-07-04 11:22:39,524 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-07-04 11:22:39,524 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-07-04 11:22:39,524 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 9%|███▊ | 4/46 [00:00<00:01, 33.16it/s]\u001b[A\n",
+ " 17%|███████▋ | 8/46 [00:00<00:01, 29.60it/s]\u001b[A\n",
+ " 24%|██████████▎ | 11/46 [00:00<00:01, 27.75it/s]\u001b[A\n",
+ " 30%|█████████████ | 14/46 [00:00<00:01, 27.78it/s]\u001b[A\n",
+ " 37%|███████████████▉ | 17/46 [00:00<00:01, 27.00it/s]\u001b[A\n",
+ " 43%|██████████████████▋ | 20/46 [00:00<00:01, 25.65it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 23/46 [00:00<00:00, 23.31it/s]\u001b[A\n",
+ " 57%|████████████████████████▎ | 26/46 [00:01<00:00, 22.47it/s]\u001b[A\n",
+ " 63%|███████████████████████████ | 29/46 [00:01<00:00, 22.29it/s]\u001b[A\n",
+ " 70%|█████████████████████████████▉ | 32/46 [00:01<00:00, 22.68it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 35/46 [00:01<00:00, 23.12it/s]\u001b[A\n",
+ " 83%|███████████████████████████████████▌ | 38/46 [00:01<00:00, 24.24it/s]\u001b[A\n",
+ " 89%|██████████████████████████████████████▎ | 41/46 [00:01<00:00, 24.55it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 2.056835651397705, 'eval_runtime': 1.9007, 'eval_samples_per_second': 24.201, 'eval_steps_per_second': 24.201, 'epoch': 1.0}\n",
+ " 17%|██████▎ | 560/3360 [12:31<1:02:53, 1.35s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:01<00:00, 24.32it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-07-04 11:22:41,427 >> Saving model checkpoint to saves/qwen2-0.5b/lora/sft/checkpoint-560\n",
+ "[INFO|configuration_utils.py:733] 2024-07-04 11:22:42,026 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-04 11:22:42,027 >> Model config Qwen2Config {\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-07-04 11:22:42,060 >> tokenizer config file saved in saves/qwen2-0.5b/lora/sft/checkpoint-560/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-07-04 11:22:42,060 >> Special tokens file saved in saves/qwen2-0.5b/lora/sft/checkpoint-560/special_tokens_map.json\n",
+ "{'loss': 1.996, 'grad_norm': 2.9073429107666016, 'learning_rate': 9.852982837266955e-05, 'epoch': 1.02}\n",
+ "{'loss': 1.7941, 'grad_norm': 3.4045894145965576, 'learning_rate': 9.840217551150706e-05, 'epoch': 1.04}\n",
+ "{'loss': 1.9779, 'grad_norm': 2.8464860916137695, 'learning_rate': 9.826929872276255e-05, 'epoch': 1.05}\n",
+ "{'loss': 1.92, 'grad_norm': 3.770984411239624, 'learning_rate': 9.81312123475006e-05, 'epoch': 1.07}\n",
+ "{'loss': 1.8683, 'grad_norm': 3.4236226081848145, 'learning_rate': 9.798793128904356e-05, 'epoch': 1.09}\n",
+ "{'loss': 1.9201, 'grad_norm': 4.08709716796875, 'learning_rate': 9.78394710113631e-05, 'epoch': 1.11}\n",
+ "{'loss': 1.8563, 'grad_norm': 3.362687349319458, 'learning_rate': 9.768584753741134e-05, 'epoch': 1.12}\n",
+ "{'loss': 1.913, 'grad_norm': 5.210264682769775, 'learning_rate': 9.752707744739145e-05, 'epoch': 1.14}\n",
+ "{'loss': 1.9273, 'grad_norm': 3.515490770339966, 'learning_rate': 9.736317787696816e-05, 'epoch': 1.16}\n",
+ "{'loss': 1.8016, 'grad_norm': 3.4942610263824463, 'learning_rate': 9.719416651541839e-05, 'epoch': 1.18}\n",
+ "{'loss': 1.7993, 'grad_norm': 2.7268266677856445, 'learning_rate': 9.702006160372209e-05, 'epoch': 1.2}\n",
+ "{'loss': 1.9155, 'grad_norm': 3.6193785667419434, 'learning_rate': 9.684088193259355e-05, 'epoch': 1.21}\n",
+ "{'loss': 1.8261, 'grad_norm': 4.29509973526001, 'learning_rate': 9.665664684045333e-05, 'epoch': 1.23}\n",
+ "{'loss': 1.9301, 'grad_norm': 4.692563056945801, 'learning_rate': 9.646737621134112e-05, 'epoch': 1.25}\n",
+ "{'loss': 1.8418, 'grad_norm': 4.545106410980225, 'learning_rate': 9.627309047276974e-05, 'epoch': 1.27}\n",
+ "{'loss': 2.0611, 'grad_norm': 4.3200860023498535, 'learning_rate': 9.607381059352038e-05, 'epoch': 1.29}\n",
+ "{'loss': 1.9531, 'grad_norm': 3.2151238918304443, 'learning_rate': 9.586955808137958e-05, 'epoch': 1.3}\n",
+ "{'loss': 1.9447, 'grad_norm': 3.385021209716797, 'learning_rate': 9.566035498081784e-05, 'epoch': 1.32}\n",
+ "{'loss': 1.9424, 'grad_norm': 8.94682502746582, 'learning_rate': 9.544622387061055e-05, 'epoch': 1.34}\n",
+ "{'loss': 1.706, 'grad_norm': 4.064428806304932, 'learning_rate': 9.522718786140097e-05, 'epoch': 1.36}\n",
+ "{'loss': 1.9165, 'grad_norm': 4.604166507720947, 'learning_rate': 9.500327059320606e-05, 'epoch': 1.37}\n",
+ "{'loss': 1.7816, 'grad_norm': 5.32956600189209, 'learning_rate': 9.477449623286505e-05, 'epoch': 1.39}\n",
+ "{'loss': 1.6637, 'grad_norm': 3.613009214401245, 'learning_rate': 9.454088947143116e-05, 'epoch': 1.41}\n",
+ "{'loss': 1.9416, 'grad_norm': 4.8296799659729, 'learning_rate': 9.430247552150673e-05, 'epoch': 1.43}\n",
+ "{'loss': 1.8371, 'grad_norm': 4.565757751464844, 'learning_rate': 9.405928011452211e-05, 'epoch': 1.45}\n",
+ "{'loss': 1.846, 'grad_norm': 3.5512914657592773, 'learning_rate': 9.381132949795861e-05, 'epoch': 1.46}\n",
+ "{'loss': 2.0069, 'grad_norm': 3.9040660858154297, 'learning_rate': 9.35586504325155e-05, 'epoch': 1.48}\n",
+ "{'loss': 1.8083, 'grad_norm': 3.609498977661133, 'learning_rate': 9.330127018922194e-05, 'epoch': 1.5}\n",
+ "{'loss': 1.7487, 'grad_norm': 3.3245325088500977, 'learning_rate': 9.303921654649362e-05, 'epoch': 1.52}\n",
+ "{'loss': 1.8764, 'grad_norm': 4.417221546173096, 'learning_rate': 9.277251778713474e-05, 'epoch': 1.54}\n",
+ "{'loss': 1.8843, 'grad_norm': 4.959105014801025, 'learning_rate': 9.250120269528546e-05, 'epoch': 1.55}\n",
+ "{'loss': 1.793, 'grad_norm': 3.7974698543548584, 'learning_rate': 9.22253005533154e-05, 'epoch': 1.57}\n",
+ "{'loss': 1.9039, 'grad_norm': 3.882502555847168, 'learning_rate': 9.194484113866313e-05, 'epoch': 1.59}\n",
+ "{'loss': 1.9854, 'grad_norm': 3.416905164718628, 'learning_rate': 9.165985472062246e-05, 'epoch': 1.61}\n",
+ "{'loss': 1.7529, 'grad_norm': 3.456245183944702, 'learning_rate': 9.137037205707552e-05, 'epoch': 1.62}\n",
+ "{'loss': 1.8017, 'grad_norm': 3.490054130554199, 'learning_rate': 9.107642439117321e-05, 'epoch': 1.64}\n",
+ "{'loss': 1.8225, 'grad_norm': 3.2115142345428467, 'learning_rate': 9.077804344796302e-05, 'epoch': 1.66}\n",
+ "{'loss': 1.8333, 'grad_norm': 3.5726113319396973, 'learning_rate': 9.04752614309652e-05, 'epoch': 1.68}\n",
+ "{'loss': 1.7861, 'grad_norm': 3.9323503971099854, 'learning_rate': 9.01681110186971e-05, 'epoch': 1.7}\n",
+ "{'loss': 1.8067, 'grad_norm': 4.4842352867126465, 'learning_rate': 8.985662536114613e-05, 'epoch': 1.71}\n",
+ "{'loss': 1.8397, 'grad_norm': 3.1608762741088867, 'learning_rate': 8.954083807619208e-05, 'epoch': 1.73}\n",
+ "{'loss': 1.9411, 'grad_norm': 3.920475959777832, 'learning_rate': 8.922078324597879e-05, 'epoch': 1.75}\n",
+ "{'loss': 1.8974, 'grad_norm': 3.438220739364624, 'learning_rate': 8.889649541323574e-05, 'epoch': 1.77}\n",
+ "{'loss': 1.8202, 'grad_norm': 4.780834674835205, 'learning_rate': 8.856800957755e-05, 'epoch': 1.78}\n",
+ "{'loss': 1.8528, 'grad_norm': 3.768432378768921, 'learning_rate': 8.823536119158864e-05, 'epoch': 1.8}\n",
+ "{'loss': 1.753, 'grad_norm': 4.07826042175293, 'learning_rate': 8.789858615727265e-05, 'epoch': 1.82}\n",
+ "{'loss': 1.7389, 'grad_norm': 3.5676631927490234, 'learning_rate': 8.755772082190194e-05, 'epoch': 1.84}\n",
+ "{'loss': 1.9198, 'grad_norm': 3.463003635406494, 'learning_rate': 8.721280197423258e-05, 'epoch': 1.86}\n",
+ "{'loss': 1.7722, 'grad_norm': 4.634316921234131, 'learning_rate': 8.68638668405062e-05, 'epoch': 1.87}\n",
+ "{'loss': 1.8237, 'grad_norm': 4.284477710723877, 'learning_rate': 8.651095308043232e-05, 'epoch': 1.89}\n",
+ "{'loss': 2.0051, 'grad_norm': 4.610734462738037, 'learning_rate': 8.61540987831238e-05, 'epoch': 1.91}\n",
+ "{'loss': 1.9493, 'grad_norm': 4.1395392417907715, 'learning_rate': 8.579334246298593e-05, 'epoch': 1.93}\n",
+ "{'loss': 1.8477, 'grad_norm': 3.6301958560943604, 'learning_rate': 8.542872305555978e-05, 'epoch': 1.95}\n",
+ "{'loss': 1.7, 'grad_norm': 3.8048858642578125, 'learning_rate': 8.50602799133199e-05, 'epoch': 1.96}\n",
+ "{'loss': 1.8371, 'grad_norm': 3.2337429523468018, 'learning_rate': 8.468805280142709e-05, 'epoch': 1.98}\n",
+ "{'loss': 1.8531, 'grad_norm': 4.216500282287598, 'learning_rate': 8.43120818934367e-05, 'epoch': 2.0}\n",
+ " 33%|█████████████ | 1120/3360 [25:00<49:13, 1.32s/it][INFO|trainer.py:3788] 2024-07-04 11:35:10,200 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-07-04 11:35:10,200 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-07-04 11:35:10,200 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 9%|███▊ | 4/46 [00:00<00:01, 29.06it/s]\u001b[A\n",
+ " 15%|██████▋ | 7/46 [00:00<00:01, 26.71it/s]\u001b[A\n",
+ " 22%|█████████▎ | 10/46 [00:00<00:01, 25.85it/s]\u001b[A\n",
+ " 28%|████████████▏ | 13/46 [00:00<00:01, 24.50it/s]\u001b[A\n",
+ " 35%|██████████████▉ | 16/46 [00:00<00:01, 24.61it/s]\u001b[A\n",
+ " 41%|█████████████████▊ | 19/46 [00:00<00:01, 25.00it/s]\u001b[A\n",
+ " 48%|████████████████████▌ | 22/46 [00:00<00:00, 24.89it/s]\u001b[A\n",
+ " 54%|███████████████████████▎ | 25/46 [00:00<00:00, 24.70it/s]\u001b[A\n",
+ " 61%|██████████████████████████▏ | 28/46 [00:01<00:00, 24.56it/s]\u001b[A\n",
+ " 67%|████████████████████████████▉ | 31/46 [00:01<00:00, 24.07it/s]\u001b[A\n",
+ " 74%|███████████████████████████████▊ | 34/46 [00:01<00:00, 24.64it/s]\u001b[A\n",
+ " 80%|██████████████████████████████████▌ | 37/46 [00:01<00:00, 24.64it/s]\u001b[A\n",
+ " 87%|█████████████████████████████████████▍ | 40/46 [00:01<00:00, 23.45it/s]\u001b[A\n",
+ " 93%|████████████████████████████████████████▏ | 43/46 [00:01<00:00, 24.22it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 2.01658296585083, 'eval_runtime': 1.9249, 'eval_samples_per_second': 23.898, 'eval_steps_per_second': 23.898, 'epoch': 2.0}\n",
+ " 33%|█████████████ | 1120/3360 [25:02<49:13, 1.32s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:01<00:00, 23.38it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-07-04 11:35:12,127 >> Saving model checkpoint to saves/qwen2-0.5b/lora/sft/checkpoint-1120\n",
+ "[INFO|configuration_utils.py:733] 2024-07-04 11:35:13,176 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-04 11:35:13,177 >> Model config Qwen2Config {\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-07-04 11:35:13,210 >> tokenizer config file saved in saves/qwen2-0.5b/lora/sft/checkpoint-1120/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-07-04 11:35:13,211 >> Special tokens file saved in saves/qwen2-0.5b/lora/sft/checkpoint-1120/special_tokens_map.json\n",
+ "{'loss': 1.5674, 'grad_norm': 4.559268474578857, 'learning_rate': 8.393240776696274e-05, 'epoch': 2.02}\n",
+ "{'loss': 1.4393, 'grad_norm': 3.3662822246551514, 'learning_rate': 8.354907139929851e-05, 'epoch': 2.03}\n",
+ "{'loss': 1.5166, 'grad_norm': 4.587384223937988, 'learning_rate': 8.316211416299397e-05, 'epoch': 2.05}\n",
+ "{'loss': 1.4818, 'grad_norm': 5.713983535766602, 'learning_rate': 8.27715778213905e-05, 'epoch': 2.07}\n",
+ "{'loss': 1.3679, 'grad_norm': 3.7478792667388916, 'learning_rate': 8.237750452411353e-05, 'epoch': 2.09}\n",
+ "{'loss': 1.4682, 'grad_norm': 3.7805116176605225, 'learning_rate': 8.197993680252334e-05, 'epoch': 2.11}\n",
+ "{'loss': 1.6848, 'grad_norm': 4.318390846252441, 'learning_rate': 8.157891756512488e-05, 'epoch': 2.12}\n",
+ "{'loss': 1.447, 'grad_norm': 4.625955581665039, 'learning_rate': 8.117449009293668e-05, 'epoch': 2.14}\n",
+ "{'loss': 1.4888, 'grad_norm': 4.70202112197876, 'learning_rate': 8.076669803481965e-05, 'epoch': 2.16}\n",
+ "{'loss': 1.5405, 'grad_norm': 6.126914978027344, 'learning_rate': 8.035558540276618e-05, 'epoch': 2.18}\n",
+ "{'loss': 1.4751, 'grad_norm': 3.867528200149536, 'learning_rate': 7.994119656715002e-05, 'epoch': 2.2}\n",
+ "{'loss': 1.5175, 'grad_norm': 4.935867786407471, 'learning_rate': 7.952357625193749e-05, 'epoch': 2.21}\n",
+ "{'loss': 1.5586, 'grad_norm': 5.28302001953125, 'learning_rate': 7.91027695298606e-05, 'epoch': 2.23}\n",
+ "{'loss': 1.5798, 'grad_norm': 4.9564738273620605, 'learning_rate': 7.86788218175523e-05, 'epoch': 2.25}\n",
+ "{'loss': 1.4184, 'grad_norm': 4.7498779296875, 'learning_rate': 7.8251778870645e-05, 'epoch': 2.27}\n",
+ "{'loss': 1.4736, 'grad_norm': 5.780045032501221, 'learning_rate': 7.782168677883206e-05, 'epoch': 2.28}\n",
+ "{'loss': 1.5192, 'grad_norm': 3.647230625152588, 'learning_rate': 7.738859196089358e-05, 'epoch': 2.3}\n",
+ "{'loss': 1.5836, 'grad_norm': 4.818410396575928, 'learning_rate': 7.695254115968648e-05, 'epoch': 2.32}\n",
+ "{'loss': 1.6111, 'grad_norm': 4.5074286460876465, 'learning_rate': 7.651358143709972e-05, 'epoch': 2.34}\n",
+ "{'loss': 1.6122, 'grad_norm': 4.6216816902160645, 'learning_rate': 7.60717601689749e-05, 'epoch': 2.36}\n",
+ "{'loss': 1.5633, 'grad_norm': 9.873260498046875, 'learning_rate': 7.562712503999327e-05, 'epoch': 2.37}\n",
+ "{'loss': 1.7444, 'grad_norm': 4.795359134674072, 'learning_rate': 7.517972403852905e-05, 'epoch': 2.39}\n",
+ "{'loss': 1.5804, 'grad_norm': 4.818080902099609, 'learning_rate': 7.472960545147038e-05, 'epoch': 2.41}\n",
+ "{'loss': 1.4748, 'grad_norm': 5.576250076293945, 'learning_rate': 7.427681785900761e-05, 'epoch': 2.43}\n",
+ "{'loss': 1.5531, 'grad_norm': 4.261260509490967, 'learning_rate': 7.382141012939034e-05, 'epoch': 2.45}\n",
+ "{'loss': 1.4554, 'grad_norm': 4.23293399810791, 'learning_rate': 7.33634314136531e-05, 'epoch': 2.46}\n",
+ "{'loss': 1.5272, 'grad_norm': 4.627878665924072, 'learning_rate': 7.290293114031061e-05, 'epoch': 2.48}\n",
+ "{'loss': 1.6616, 'grad_norm': 4.36018705368042, 'learning_rate': 7.243995901002312e-05, 'epoch': 2.5}\n",
+ "{'loss': 1.5503, 'grad_norm': 5.698966026306152, 'learning_rate': 7.197456499023225e-05, 'epoch': 2.52}\n",
+ "{'loss': 1.5043, 'grad_norm': 4.486359119415283, 'learning_rate': 7.150679930976825e-05, 'epoch': 2.53}\n",
+ "{'loss': 1.5796, 'grad_norm': 8.031678199768066, 'learning_rate': 7.103671245342887e-05, 'epoch': 2.55}\n",
+ "{'loss': 1.4317, 'grad_norm': 5.806405544281006, 'learning_rate': 7.056435515653059e-05, 'epoch': 2.57}\n",
+ "{'loss': 1.696, 'grad_norm': 6.584068298339844, 'learning_rate': 7.008977839943299e-05, 'epoch': 2.59}\n",
+ "{'loss': 1.4768, 'grad_norm': 4.871330261230469, 'learning_rate': 6.961303340203653e-05, 'epoch': 2.61}\n",
+ "{'loss': 1.587, 'grad_norm': 3.9512643814086914, 'learning_rate': 6.91341716182545e-05, 'epoch': 2.62}\n",
+ "{'loss': 1.4991, 'grad_norm': 3.4907033443450928, 'learning_rate': 6.86532447304597e-05, 'epoch': 2.64}\n",
+ "{'loss': 1.4822, 'grad_norm': 4.603860855102539, 'learning_rate': 6.817030464390656e-05, 'epoch': 2.66}\n",
+ "{'loss': 1.6408, 'grad_norm': 5.737949371337891, 'learning_rate': 6.768540348112907e-05, 'epoch': 2.68}\n",
+ "{'loss': 1.4316, 'grad_norm': 5.838085174560547, 'learning_rate': 6.719859357631535e-05, 'epoch': 2.7}\n",
+ "{'loss': 1.414, 'grad_norm': 5.460419654846191, 'learning_rate': 6.670992746965938e-05, 'epoch': 2.71}\n",
+ "{'loss': 1.6858, 'grad_norm': 5.311679363250732, 'learning_rate': 6.621945790169036e-05, 'epoch': 2.73}\n",
+ "{'loss': 1.5802, 'grad_norm': 4.987999439239502, 'learning_rate': 6.572723780758069e-05, 'epoch': 2.75}\n",
+ "{'loss': 1.5672, 'grad_norm': 5.01920223236084, 'learning_rate': 6.523332031143272e-05, 'epoch': 2.77}\n",
+ "{'loss': 1.5914, 'grad_norm': 4.382671356201172, 'learning_rate': 6.473775872054521e-05, 'epoch': 2.78}\n",
+ "{'loss': 1.4284, 'grad_norm': 3.818115711212158, 'learning_rate': 6.424060651966007e-05, 'epoch': 2.8}\n",
+ "{'loss': 1.499, 'grad_norm': 4.427730560302734, 'learning_rate': 6.374191736518974e-05, 'epoch': 2.82}\n",
+ "{'loss': 1.4914, 'grad_norm': 4.508190631866455, 'learning_rate': 6.324174507942637e-05, 'epoch': 2.84}\n",
+ "{'loss': 1.4629, 'grad_norm': 6.055968284606934, 'learning_rate': 6.274014364473274e-05, 'epoch': 2.86}\n",
+ "{'loss': 1.717, 'grad_norm': 4.5250678062438965, 'learning_rate': 6.22371671977162e-05, 'epoch': 2.87}\n",
+ "{'loss': 1.5103, 'grad_norm': 4.378949165344238, 'learning_rate': 6.173287002338577e-05, 'epoch': 2.89}\n",
+ "{'loss': 1.511, 'grad_norm': 5.3176751136779785, 'learning_rate': 6.122730654929334e-05, 'epoch': 2.91}\n",
+ "{'loss': 1.4656, 'grad_norm': 4.5037994384765625, 'learning_rate': 6.072053133965938e-05, 'epoch': 2.93}\n",
+ "{'loss': 1.6443, 'grad_norm': 4.189935684204102, 'learning_rate': 6.021259908948402e-05, 'epoch': 2.95}\n",
+ "{'loss': 1.6633, 'grad_norm': 4.525129795074463, 'learning_rate': 5.970356461864391e-05, 'epoch': 2.96}\n",
+ "{'loss': 1.4935, 'grad_norm': 5.440227508544922, 'learning_rate': 5.919348286597569e-05, 'epoch': 2.98}\n",
+ "{'loss': 1.6304, 'grad_norm': 4.765013694763184, 'learning_rate': 5.868240888334653e-05, 'epoch': 3.0}\n",
+ " 50%|███████████████████▌ | 1680/3360 [37:13<35:24, 1.26s/it][INFO|trainer.py:3788] 2024-07-04 11:47:23,337 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-07-04 11:47:23,337 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-07-04 11:47:23,337 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 9%|███▊ | 4/46 [00:00<00:01, 36.22it/s]\u001b[A\n",
+ " 17%|███████▋ | 8/46 [00:00<00:01, 30.18it/s]\u001b[A\n",
+ " 26%|███████████▏ | 12/46 [00:00<00:01, 28.29it/s]\u001b[A\n",
+ " 33%|██████████████ | 15/46 [00:00<00:01, 27.11it/s]\u001b[A\n",
+ " 39%|████████████████▊ | 18/46 [00:00<00:01, 25.76it/s]\u001b[A\n",
+ " 46%|███████████████████▋ | 21/46 [00:00<00:00, 26.14it/s]\u001b[A\n",
+ " 52%|██████████████████████▍ | 24/46 [00:00<00:00, 25.68it/s]\u001b[A\n",
+ " 59%|█████████████████████████▏ | 27/46 [00:01<00:00, 25.54it/s]\u001b[A\n",
+ " 65%|████████████████████████████ | 30/46 [00:01<00:00, 25.92it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▊ | 33/46 [00:01<00:00, 25.97it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▋ | 36/46 [00:01<00:00, 26.05it/s]\u001b[A\n",
+ " 85%|████████████████████████████████████▍ | 39/46 [00:01<00:00, 26.26it/s]\u001b[A\n",
+ " 91%|███████████████████████████████████████▎ | 42/46 [00:01<00:00, 26.26it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 2.0813467502593994, 'eval_runtime': 1.8021, 'eval_samples_per_second': 25.525, 'eval_steps_per_second': 25.525, 'epoch': 3.0}\n",
+ " 50%|███████████████████▌ | 1680/3360 [37:15<35:24, 1.26s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:01<00:00, 24.30it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-07-04 11:47:25,141 >> Saving model checkpoint to saves/qwen2-0.5b/lora/sft/checkpoint-1680\n",
+ "[INFO|configuration_utils.py:733] 2024-07-04 11:47:25,920 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-04 11:47:25,920 >> Model config Qwen2Config {\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-07-04 11:47:25,956 >> tokenizer config file saved in saves/qwen2-0.5b/lora/sft/checkpoint-1680/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-07-04 11:47:25,956 >> Special tokens file saved in saves/qwen2-0.5b/lora/sft/checkpoint-1680/special_tokens_map.json\n",
+ "{'loss': 1.4346, 'grad_norm': 4.2551727294921875, 'learning_rate': 5.8170397829712485e-05, 'epoch': 3.02}\n",
+ "{'loss': 1.1148, 'grad_norm': 4.442202568054199, 'learning_rate': 5.765750496516547e-05, 'epoch': 3.03}\n",
+ "{'loss': 1.2852, 'grad_norm': 5.140079021453857, 'learning_rate': 5.714378564496901e-05, 'epoch': 3.05}\n",
+ "{'loss': 1.2086, 'grad_norm': 5.270480632781982, 'learning_rate': 5.6629295313583974e-05, 'epoch': 3.07}\n",
+ "{'loss': 1.1824, 'grad_norm': 5.192230224609375, 'learning_rate': 5.611408949868457e-05, 'epoch': 3.09}\n",
+ "{'loss': 1.2875, 'grad_norm': 5.830446720123291, 'learning_rate': 5.559822380516539e-05, 'epoch': 3.11}\n",
+ "{'loss': 1.2162, 'grad_norm': 4.606627464294434, 'learning_rate': 5.5081753909140096e-05, 'epoch': 3.12}\n",
+ "{'loss': 1.3341, 'grad_norm': 5.547798156738281, 'learning_rate': 5.456473555193242e-05, 'epoch': 3.14}\n",
+ "{'loss': 1.2143, 'grad_norm': 5.579686641693115, 'learning_rate': 5.404722453406017e-05, 'epoch': 3.16}\n",
+ "{'loss': 1.2823, 'grad_norm': 6.129615783691406, 'learning_rate': 5.3529276709212816e-05, 'epoch': 3.18}\n",
+ "{'loss': 1.2582, 'grad_norm': 5.295398712158203, 'learning_rate': 5.30109479782233e-05, 'epoch': 3.2}\n",
+ "{'loss': 1.2438, 'grad_norm': 6.145551681518555, 'learning_rate': 5.249229428303486e-05, 'epoch': 3.21}\n",
+ "{'loss': 1.4372, 'grad_norm': 4.52131986618042, 'learning_rate': 5.197337160066331e-05, 'epoch': 3.23}\n",
+ "{'loss': 1.2346, 'grad_norm': 4.7772955894470215, 'learning_rate': 5.145423593715557e-05, 'epoch': 3.25}\n",
+ "{'loss': 1.1929, 'grad_norm': 5.328940391540527, 'learning_rate': 5.0934943321545115e-05, 'epoch': 3.27}\n",
+ "{'loss': 1.1731, 'grad_norm': 5.733246803283691, 'learning_rate': 5.041554979980486e-05, 'epoch': 3.28}\n",
+ "{'loss': 1.3325, 'grad_norm': 6.418582439422607, 'learning_rate': 4.9896111428798254e-05, 'epoch': 3.3}\n",
+ "{'loss': 1.3305, 'grad_norm': 4.787232398986816, 'learning_rate': 4.9376684270229254e-05, 'epoch': 3.32}\n",
+ "{'loss': 1.2982, 'grad_norm': 4.655210971832275, 'learning_rate': 4.8857324384591653e-05, 'epoch': 3.34}\n",
+ "{'loss': 1.2833, 'grad_norm': 4.85659122467041, 'learning_rate': 4.8338087825118675e-05, 'epoch': 3.36}\n",
+ "{'loss': 1.2835, 'grad_norm': 5.313413143157959, 'learning_rate': 4.781903063173321e-05, 'epoch': 3.37}\n",
+ "{'loss': 1.2001, 'grad_norm': 4.640489101409912, 'learning_rate': 4.730020882499964e-05, 'epoch': 3.39}\n",
+ "{'loss': 1.2597, 'grad_norm': 6.197988033294678, 'learning_rate': 4.678167840007767e-05, 'epoch': 3.41}\n",
+ "{'loss': 1.3514, 'grad_norm': 4.942805290222168, 'learning_rate': 4.626349532067879e-05, 'epoch': 3.43}\n",
+ "{'loss': 1.3118, 'grad_norm': 5.112833499908447, 'learning_rate': 4.574571551302647e-05, 'epoch': 3.44}\n",
+ "{'loss': 1.3232, 'grad_norm': 4.470940113067627, 'learning_rate': 4.522839485981994e-05, 'epoch': 3.46}\n",
+ "{'loss': 1.2533, 'grad_norm': 5.801645755767822, 'learning_rate': 4.471158919420312e-05, 'epoch': 3.48}\n",
+ "{'loss': 1.2343, 'grad_norm': 6.3296709060668945, 'learning_rate': 4.4195354293738484e-05, 'epoch': 3.5}\n",
+ "{'loss': 1.1995, 'grad_norm': 6.262467384338379, 'learning_rate': 4.367974587438733e-05, 'epoch': 3.52}\n",
+ "{'loss': 1.2744, 'grad_norm': 5.313882827758789, 'learning_rate': 4.316481958449634e-05, 'epoch': 3.53}\n",
+ "{'loss': 1.2366, 'grad_norm': 7.450092792510986, 'learning_rate': 4.2650630998791615e-05, 'epoch': 3.55}\n",
+ "{'loss': 1.3738, 'grad_norm': 4.7678680419921875, 'learning_rate': 4.213723561238074e-05, 'epoch': 3.57}\n",
+ "{'loss': 1.1538, 'grad_norm': 4.40903377532959, 'learning_rate': 4.162468883476319e-05, 'epoch': 3.59}\n",
+ "{'loss': 1.2502, 'grad_norm': 5.227618217468262, 'learning_rate': 4.111304598385018e-05, 'epoch': 3.61}\n",
+ "{'loss': 1.3061, 'grad_norm': 6.307828903198242, 'learning_rate': 4.060236227999441e-05, 'epoch': 3.62}\n",
+ "{'loss': 1.2667, 'grad_norm': 5.422544002532959, 'learning_rate': 4.0092692840030134e-05, 'epoch': 3.64}\n",
+ "{'loss': 1.2039, 'grad_norm': 7.9964141845703125, 'learning_rate': 3.9584092671324606e-05, 'epoch': 3.66}\n",
+ "{'loss': 1.3509, 'grad_norm': 7.364163875579834, 'learning_rate': 3.907661666584131e-05, 'epoch': 3.68}\n",
+ "{'loss': 1.3427, 'grad_norm': 6.175056457519531, 'learning_rate': 3.857031959421553e-05, 'epoch': 3.69}\n",
+ "{'loss': 1.345, 'grad_norm': 5.0636725425720215, 'learning_rate': 3.806525609984312e-05, 'epoch': 3.71}\n",
+ "{'loss': 1.1779, 'grad_norm': 5.742904186248779, 'learning_rate': 3.7561480692983006e-05, 'epoch': 3.73}\n",
+ "{'loss': 1.29, 'grad_norm': 5.6552276611328125, 'learning_rate': 3.705904774487396e-05, 'epoch': 3.75}\n",
+ "{'loss': 1.2704, 'grad_norm': 5.890940189361572, 'learning_rate': 3.655801148186655e-05, 'epoch': 3.77}\n",
+ "{'loss': 1.1811, 'grad_norm': 5.2217583656311035, 'learning_rate': 3.6058425979570485e-05, 'epoch': 3.78}\n",
+ "{'loss': 1.2768, 'grad_norm': 5.42200231552124, 'learning_rate': 3.556034515701852e-05, 'epoch': 3.8}\n",
+ "{'loss': 1.2891, 'grad_norm': 5.615239143371582, 'learning_rate': 3.506382277084696e-05, 'epoch': 3.82}\n",
+ "{'loss': 1.2401, 'grad_norm': 5.646175861358643, 'learning_rate': 3.4568912409493945e-05, 'epoch': 3.84}\n",
+ "{'loss': 1.0597, 'grad_norm': 5.7333197593688965, 'learning_rate': 3.4075667487415785e-05, 'epoch': 3.86}\n",
+ "{'loss': 1.1621, 'grad_norm': 5.321319580078125, 'learning_rate': 3.358414123932195e-05, 'epoch': 3.87}\n",
+ "{'loss': 1.2736, 'grad_norm': 4.852396011352539, 'learning_rate': 3.3094386714429724e-05, 'epoch': 3.89}\n",
+ "{'loss': 1.2597, 'grad_norm': 7.163392066955566, 'learning_rate': 3.2606456770738636e-05, 'epoch': 3.91}\n",
+ "{'loss': 1.1871, 'grad_norm': 5.611868381500244, 'learning_rate': 3.212040406932569e-05, 'epoch': 3.93}\n",
+ "{'loss': 1.0307, 'grad_norm': 4.783786296844482, 'learning_rate': 3.163628106866172e-05, 'epoch': 3.94}\n",
+ "{'loss': 1.4526, 'grad_norm': 5.691711902618408, 'learning_rate': 3.115414001894974e-05, 'epoch': 3.96}\n",
+ "{'loss': 1.3101, 'grad_norm': 5.280589580535889, 'learning_rate': 3.067403295648566e-05, 'epoch': 3.98}\n",
+ "{'loss': 1.0932, 'grad_norm': 4.22761869430542, 'learning_rate': 3.019601169804216e-05, 'epoch': 4.0}\n",
+ " 67%|██████████████████████████ | 2240/3360 [49:01<23:24, 1.25s/it][INFO|trainer.py:3788] 2024-07-04 11:59:10,533 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-07-04 11:59:10,533 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-07-04 11:59:10,533 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 9%|███▊ | 4/46 [00:00<00:01, 37.50it/s]\u001b[A\n",
+ " 17%|███████▋ | 8/46 [00:00<00:01, 29.67it/s]\u001b[A\n",
+ " 26%|███████████▏ | 12/46 [00:00<00:01, 27.86it/s]\u001b[A\n",
+ " 33%|██████████████ | 15/46 [00:00<00:01, 27.79it/s]\u001b[A\n",
+ " 39%|████████████████▊ | 18/46 [00:00<00:01, 27.74it/s]\u001b[A\n",
+ " 46%|███████████████████▋ | 21/46 [00:00<00:00, 27.55it/s]\u001b[A\n",
+ " 52%|██████████████████████▍ | 24/46 [00:00<00:00, 26.89it/s]\u001b[A\n",
+ " 59%|█████████████████████████▏ | 27/46 [00:00<00:00, 26.15it/s]\u001b[A\n",
+ " 65%|████████████████████████████ | 30/46 [00:01<00:00, 26.12it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▊ | 33/46 [00:01<00:00, 25.32it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▋ | 36/46 [00:01<00:00, 26.12it/s]\u001b[A\n",
+ " 85%|████████████████████████████████████▍ | 39/46 [00:01<00:00, 26.56it/s]\u001b[A\n",
+ " 91%|███████████████████████████████████████▎ | 42/46 [00:01<00:00, 26.71it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 2.2238643169403076, 'eval_runtime': 1.7413, 'eval_samples_per_second': 26.417, 'eval_steps_per_second': 26.417, 'epoch': 4.0}\n",
+ " 67%|██████████████████████████ | 2240/3360 [49:02<23:24, 1.25s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:01<00:00, 26.68it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-07-04 11:59:12,277 >> Saving model checkpoint to saves/qwen2-0.5b/lora/sft/checkpoint-2240\n",
+ "[INFO|configuration_utils.py:733] 2024-07-04 11:59:13,447 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-04 11:59:13,448 >> Model config Qwen2Config {\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-07-04 11:59:13,487 >> tokenizer config file saved in saves/qwen2-0.5b/lora/sft/checkpoint-2240/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-07-04 11:59:13,487 >> Special tokens file saved in saves/qwen2-0.5b/lora/sft/checkpoint-2240/special_tokens_map.json\n",
+ "{'loss': 1.1326, 'grad_norm': 3.8860654830932617, 'learning_rate': 2.9720127835276256e-05, 'epoch': 4.02}\n",
+ "{'loss': 1.0202, 'grad_norm': 5.21559476852417, 'learning_rate': 2.9246432729161055e-05, 'epoch': 4.03}\n",
+ "{'loss': 1.0934, 'grad_norm': 5.658751964569092, 'learning_rate': 2.8774977504442647e-05, 'epoch': 4.05}\n",
+ "{'loss': 0.9934, 'grad_norm': 5.090124130249023, 'learning_rate': 2.8305813044122097e-05, 'epoch': 4.07}\n",
+ "{'loss': 1.0309, 'grad_norm': 5.851395606994629, 'learning_rate': 2.7838989983964065e-05, 'epoch': 4.09}\n",
+ "{'loss': 1.187, 'grad_norm': 4.703646659851074, 'learning_rate': 2.737455870703155e-05, 'epoch': 4.11}\n",
+ "{'loss': 0.9171, 'grad_norm': 4.95070219039917, 'learning_rate': 2.6912569338248315e-05, 'epoch': 4.12}\n",
+ "{'loss': 1.0232, 'grad_norm': 4.933461666107178, 'learning_rate': 2.645307173898901e-05, 'epoch': 4.14}\n",
+ "{'loss': 1.1005, 'grad_norm': 5.395535469055176, 'learning_rate': 2.5996115501697694e-05, 'epoch': 4.16}\n",
+ "{'loss': 0.9827, 'grad_norm': 4.670980453491211, 'learning_rate': 2.5541749944535554e-05, 'epoch': 4.18}\n",
+ "{'loss': 0.9969, 'grad_norm': 9.713501930236816, 'learning_rate': 2.5090024106057962e-05, 'epoch': 4.19}\n",
+ "{'loss': 1.0557, 'grad_norm': 5.423773288726807, 'learning_rate': 2.464098673992205e-05, 'epoch': 4.21}\n",
+ "{'loss': 0.9903, 'grad_norm': 5.628043174743652, 'learning_rate': 2.4194686309624663e-05, 'epoch': 4.23}\n",
+ "{'loss': 1.1397, 'grad_norm': 5.057712554931641, 'learning_rate': 2.3751170983272e-05, 'epoch': 4.25}\n",
+ "{'loss': 0.962, 'grad_norm': 4.844544410705566, 'learning_rate': 2.3310488628380757e-05, 'epoch': 4.27}\n",
+ "{'loss': 1.0187, 'grad_norm': 7.445083141326904, 'learning_rate': 2.2872686806712035e-05, 'epoch': 4.28}\n",
+ "{'loss': 1.0618, 'grad_norm': 4.87847900390625, 'learning_rate': 2.243781276913811e-05, 'epoch': 4.3}\n",
+ "{'loss': 0.9125, 'grad_norm': 5.181140899658203, 'learning_rate': 2.200591345054267e-05, 'epoch': 4.32}\n",
+ "{'loss': 1.125, 'grad_norm': 8.97202205657959, 'learning_rate': 2.157703546475539e-05, 'epoch': 4.34}\n",
+ "{'loss': 0.9747, 'grad_norm': 6.134432792663574, 'learning_rate': 2.115122509952085e-05, 'epoch': 4.36}\n",
+ "{'loss': 0.9803, 'grad_norm': 9.630309104919434, 'learning_rate': 2.0728528311502976e-05, 'epoch': 4.37}\n",
+ "{'loss': 1.0843, 'grad_norm': 5.363273620605469, 'learning_rate': 2.0308990721324927e-05, 'epoch': 4.39}\n",
+ "{'loss': 1.0764, 'grad_norm': 7.712973117828369, 'learning_rate': 1.989265760864542e-05, 'epoch': 4.41}\n",
+ "{'loss': 1.1397, 'grad_norm': 5.690403938293457, 'learning_rate': 1.947957390727185e-05, 'epoch': 4.43}\n",
+ "{'loss': 1.1258, 'grad_norm': 5.744186878204346, 'learning_rate': 1.906978420031059e-05, 'epoch': 4.44}\n",
+ "{'loss': 0.9438, 'grad_norm': 8.820874214172363, 'learning_rate': 1.8663332715355396e-05, 'epoch': 4.46}\n",
+ "{'loss': 1.132, 'grad_norm': 4.420164108276367, 'learning_rate': 1.8260263319713844e-05, 'epoch': 4.48}\n",
+ "{'loss': 1.0819, 'grad_norm': 5.586333751678467, 'learning_rate': 1.7860619515673033e-05, 'epoch': 4.5}\n",
+ "{'loss': 1.0571, 'grad_norm': 5.625140190124512, 'learning_rate': 1.746444443580433e-05, 'epoch': 4.52}\n",
+ "{'loss': 1.1021, 'grad_norm': 5.7560577392578125, 'learning_rate': 1.7071780838308288e-05, 'epoch': 4.53}\n",
+ "{'loss': 1.0531, 'grad_norm': 5.3450727462768555, 'learning_rate': 1.6682671102399805e-05, 'epoch': 4.55}\n",
+ "{'loss': 1.017, 'grad_norm': 6.27817440032959, 'learning_rate': 1.629715722373423e-05, 'epoch': 4.57}\n",
+ "{'loss': 1.0471, 'grad_norm': 5.72844934463501, 'learning_rate': 1.5915280809874932e-05, 'epoch': 4.59}\n",
+ "{'loss': 0.9309, 'grad_norm': 5.988643169403076, 'learning_rate': 1.553708307580265e-05, 'epoch': 4.61}\n",
+ "{'loss': 1.0538, 'grad_norm': 5.950584411621094, 'learning_rate': 1.5162604839467265e-05, 'epoch': 4.62}\n",
+ "{'loss': 1.0554, 'grad_norm': 4.944731712341309, 'learning_rate': 1.4791886517382413e-05, 'epoch': 4.64}\n",
+ "{'loss': 1.0857, 'grad_norm': 6.031637191772461, 'learning_rate': 1.4424968120263504e-05, 'epoch': 4.66}\n",
+ "{'loss': 1.0667, 'grad_norm': 5.933581352233887, 'learning_rate': 1.4061889248709343e-05, 'epoch': 4.68}\n",
+ "{'loss': 0.9942, 'grad_norm': 6.697149276733398, 'learning_rate': 1.370268908892825e-05, 'epoch': 4.69}\n",
+ "{'loss': 1.0146, 'grad_norm': 7.122743129730225, 'learning_rate': 1.3347406408508695e-05, 'epoch': 4.71}\n",
+ "{'loss': 0.9921, 'grad_norm': 4.69237756729126, 'learning_rate': 1.2996079552235263e-05, 'epoch': 4.73}\n",
+ "{'loss': 1.0017, 'grad_norm': 5.421998977661133, 'learning_rate': 1.264874643795021e-05, 'epoch': 4.75}\n",
+ "{'loss': 1.0102, 'grad_norm': 6.121133804321289, 'learning_rate': 1.230544455246101e-05, 'epoch': 4.77}\n",
+ "{'loss': 1.062, 'grad_norm': 5.060891151428223, 'learning_rate': 1.1966210947494583e-05, 'epoch': 4.78}\n",
+ "{'loss': 1.125, 'grad_norm': 4.1661529541015625, 'learning_rate': 1.1631082235698316e-05, 'epoch': 4.8}\n",
+ "{'loss': 0.9848, 'grad_norm': 6.484502792358398, 'learning_rate': 1.130009458668863e-05, 'epoch': 4.82}\n",
+ "{'loss': 0.9632, 'grad_norm': 5.1096086502075195, 'learning_rate': 1.097328372314721e-05, 'epoch': 4.84}\n",
+ "{'loss': 1.0435, 'grad_norm': 7.69472074508667, 'learning_rate': 1.0650684916965559e-05, 'epoch': 4.85}\n",
+ "{'loss': 1.1122, 'grad_norm': 6.654355525970459, 'learning_rate': 1.0332332985438248e-05, 'epoch': 4.87}\n",
+ "{'loss': 1.0846, 'grad_norm': 6.456166744232178, 'learning_rate': 1.0018262287505086e-05, 'epoch': 4.89}\n",
+ "{'loss': 1.1491, 'grad_norm': 5.556300163269043, 'learning_rate': 9.708506720042932e-06, 'epoch': 4.91}\n",
+ "{'loss': 1.0227, 'grad_norm': 5.921450614929199, 'learning_rate': 9.403099714207175e-06, 'epoch': 4.93}\n",
+ "{'loss': 1.13, 'grad_norm': 5.2472052574157715, 'learning_rate': 9.102074231823727e-06, 'epoch': 4.94}\n",
+ "{'loss': 1.1184, 'grad_norm': 6.798206806182861, 'learning_rate': 8.805462761831418e-06, 'epoch': 4.96}\n",
+ "{'loss': 1.1483, 'grad_norm': 6.1544647216796875, 'learning_rate': 8.513297316775625e-06, 'epoch': 4.98}\n",
+ "{'loss': 1.0966, 'grad_norm': 5.619192600250244, 'learning_rate': 8.225609429353187e-06, 'epoch': 5.0}\n",
+ " 83%|██████████████████████████████▊ | 2800/3360 [1:00:45<11:43, 1.26s/it][INFO|trainer.py:3788] 2024-07-04 12:10:55,158 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-07-04 12:10:55,158 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-07-04 12:10:55,158 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 9%|███▊ | 4/46 [00:00<00:01, 31.69it/s]\u001b[A\n",
+ " 17%|███████▋ | 8/46 [00:00<00:01, 26.12it/s]\u001b[A\n",
+ " 24%|██████████▎ | 11/46 [00:00<00:01, 23.11it/s]\u001b[A\n",
+ " 30%|█████████████ | 14/46 [00:00<00:01, 23.84it/s]\u001b[A\n",
+ " 37%|███████████████▉ | 17/46 [00:00<00:01, 24.23it/s]\u001b[A\n",
+ " 43%|██████████████████▋ | 20/46 [00:00<00:01, 24.91it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 23/46 [00:00<00:00, 25.15it/s]\u001b[A\n",
+ " 57%|████████████████████████▎ | 26/46 [00:01<00:00, 25.17it/s]\u001b[A\n",
+ " 63%|███████████████████████████ | 29/46 [00:01<00:00, 23.97it/s]\u001b[A\n",
+ " 70%|█████████████████████████████▉ | 32/46 [00:01<00:00, 24.14it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 35/46 [00:01<00:00, 24.94it/s]\u001b[A\n",
+ " 83%|███████████████████████████████████▌ | 38/46 [00:01<00:00, 25.29it/s]\u001b[A\n",
+ " 89%|██████████████████████████████████████▎ | 41/46 [00:01<00:00, 25.29it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 2.439286708831787, 'eval_runtime': 1.9084, 'eval_samples_per_second': 24.104, 'eval_steps_per_second': 24.104, 'epoch': 5.0}\n",
+ " 83%|██████████████████████████████▊ | 2800/3360 [1:00:47<11:43, 1.26s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:01<00:00, 25.52it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-07-04 12:10:57,069 >> Saving model checkpoint to saves/qwen2-0.5b/lora/sft/checkpoint-2800\n",
+ "[INFO|configuration_utils.py:733] 2024-07-04 12:10:57,881 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-04 12:10:57,882 >> Model config Qwen2Config {\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-07-04 12:10:57,908 >> tokenizer config file saved in saves/qwen2-0.5b/lora/sft/checkpoint-2800/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-07-04 12:10:57,908 >> Special tokens file saved in saves/qwen2-0.5b/lora/sft/checkpoint-2800/special_tokens_map.json\n",
+ "{'loss': 0.9401, 'grad_norm': 5.465145587921143, 'learning_rate': 7.942430149009161e-06, 'epoch': 5.02}\n",
+ "{'loss': 0.9053, 'grad_norm': 5.05084228515625, 'learning_rate': 7.663790038585793e-06, 'epoch': 5.03}\n",
+ "{'loss': 0.9659, 'grad_norm': 6.576834678649902, 'learning_rate': 7.389719171023857e-06, 'epoch': 5.05}\n",
+ "{'loss': 0.9701, 'grad_norm': 6.316474437713623, 'learning_rate': 7.1202471261170245e-06, 'epoch': 5.07}\n",
+ "{'loss': 0.9986, 'grad_norm': 5.003658771514893, 'learning_rate': 6.855402987319348e-06, 'epoch': 5.09}\n",
+ "{'loss': 1.0909, 'grad_norm': 5.66398286819458, 'learning_rate': 6.595215338606397e-06, 'epoch': 5.1}\n",
+ "{'loss': 0.8373, 'grad_norm': 5.239981174468994, 'learning_rate': 6.339712261390213e-06, 'epoch': 5.12}\n",
+ "{'loss': 1.0119, 'grad_norm': 5.830501079559326, 'learning_rate': 6.088921331488568e-06, 'epoch': 5.14}\n",
+ "{'loss': 0.89, 'grad_norm': 5.683416366577148, 'learning_rate': 5.8428696161488215e-06, 'epoch': 5.16}\n",
+ "{'loss': 0.8321, 'grad_norm': 5.024005889892578, 'learning_rate': 5.601583671126531e-06, 'epoch': 5.18}\n",
+ "{'loss': 0.9924, 'grad_norm': 5.65994930267334, 'learning_rate': 5.365089537819434e-06, 'epoch': 5.19}\n",
+ "{'loss': 0.982, 'grad_norm': 5.285236835479736, 'learning_rate': 5.133412740456806e-06, 'epoch': 5.21}\n",
+ "{'loss': 0.988, 'grad_norm': 6.087540149688721, 'learning_rate': 4.906578283344759e-06, 'epoch': 5.23}\n",
+ "{'loss': 1.0628, 'grad_norm': 5.564962863922119, 'learning_rate': 4.684610648167503e-06, 'epoch': 5.25}\n",
+ "{'loss': 0.9339, 'grad_norm': 5.311854362487793, 'learning_rate': 4.467533791345191e-06, 'epoch': 5.27}\n",
+ "{'loss': 0.9112, 'grad_norm': 6.383027076721191, 'learning_rate': 4.255371141448272e-06, 'epoch': 5.28}\n",
+ "{'loss': 0.8851, 'grad_norm': 5.323634147644043, 'learning_rate': 4.048145596668967e-06, 'epoch': 5.3}\n",
+ "{'loss': 0.9129, 'grad_norm': 5.474393844604492, 'learning_rate': 3.84587952234991e-06, 'epoch': 5.32}\n",
+ "{'loss': 0.8769, 'grad_norm': 6.140456676483154, 'learning_rate': 3.6485947485702832e-06, 'epoch': 5.34}\n",
+ "{'loss': 0.9177, 'grad_norm': 5.710687637329102, 'learning_rate': 3.4563125677897932e-06, 'epoch': 5.35}\n",
+ "{'loss': 0.9235, 'grad_norm': 6.829979419708252, 'learning_rate': 3.269053732550581e-06, 'epoch': 5.37}\n",
+ "{'loss': 0.9744, 'grad_norm': 6.83032751083374, 'learning_rate': 3.086838453237506e-06, 'epoch': 5.39}\n",
+ "{'loss': 0.7769, 'grad_norm': 5.491135597229004, 'learning_rate': 2.9096863958968268e-06, 'epoch': 5.41}\n",
+ "{'loss': 0.8412, 'grad_norm': 6.708963394165039, 'learning_rate': 2.737616680113758e-06, 'epoch': 5.43}\n",
+ "{'loss': 0.8511, 'grad_norm': 6.676459312438965, 'learning_rate': 2.570647876948895e-06, 'epoch': 5.44}\n",
+ "{'loss': 0.9549, 'grad_norm': 7.339512825012207, 'learning_rate': 2.408798006933882e-06, 'epoch': 5.46}\n",
+ "{'loss': 1.1274, 'grad_norm': 6.678201198577881, 'learning_rate': 2.252084538126542e-06, 'epoch': 5.48}\n",
+ "{'loss': 0.9263, 'grad_norm': 6.124770641326904, 'learning_rate': 2.100524384225555e-06, 'epoch': 5.5}\n",
+ "{'loss': 0.943, 'grad_norm': 7.016269207000732, 'learning_rate': 1.9541339027450256e-06, 'epoch': 5.52}\n",
+ "{'loss': 0.9571, 'grad_norm': 5.896731853485107, 'learning_rate': 1.8129288932490274e-06, 'epoch': 5.53}\n",
+ "{'loss': 0.8802, 'grad_norm': 5.532138347625732, 'learning_rate': 1.6769245956464396e-06, 'epoch': 5.55}\n",
+ "{'loss': 1.0613, 'grad_norm': 6.437876224517822, 'learning_rate': 1.5461356885461075e-06, 'epoch': 5.57}\n",
+ "{'loss': 0.957, 'grad_norm': 5.419349670410156, 'learning_rate': 1.4205762876726092e-06, 'epoch': 5.59}\n",
+ "{'loss': 1.0672, 'grad_norm': 6.222854137420654, 'learning_rate': 1.3002599443428243e-06, 'epoch': 5.6}\n",
+ "{'loss': 1.0228, 'grad_norm': 6.305788993835449, 'learning_rate': 1.1851996440033319e-06, 'epoch': 5.62}\n",
+ "{'loss': 0.8494, 'grad_norm': 6.640852928161621, 'learning_rate': 1.0754078048289374e-06, 'epoch': 5.64}\n",
+ "{'loss': 0.9589, 'grad_norm': 5.630051612854004, 'learning_rate': 9.708962763824048e-07, 'epoch': 5.66}\n",
+ "{'loss': 0.9514, 'grad_norm': 5.754588603973389, 'learning_rate': 8.716763383355864e-07, 'epoch': 5.68}\n",
+ "{'loss': 0.9896, 'grad_norm': 6.073591232299805, 'learning_rate': 7.777586992519959e-07, 'epoch': 5.69}\n",
+ "{'loss': 0.8798, 'grad_norm': 6.883085250854492, 'learning_rate': 6.891534954310885e-07, 'epoch': 5.71}\n",
+ "{'loss': 0.9749, 'grad_norm': 5.874994277954102, 'learning_rate': 6.058702898142643e-07, 'epoch': 5.73}\n",
+ "{'loss': 0.862, 'grad_norm': 5.205725193023682, 'learning_rate': 5.279180709527765e-07, 'epoch': 5.75}\n",
+ "{'loss': 1.0397, 'grad_norm': 6.112522602081299, 'learning_rate': 4.553052520375911e-07, 'epoch': 5.77}\n",
+ "{'loss': 0.8691, 'grad_norm': 6.450985431671143, 'learning_rate': 3.8803966999139684e-07, 'epoch': 5.78}\n",
+ "{'loss': 0.884, 'grad_norm': 5.139239311218262, 'learning_rate': 3.261285846227868e-07, 'epoch': 5.8}\n",
+ "{'loss': 0.8508, 'grad_norm': 6.213397979736328, 'learning_rate': 2.6957867784270787e-07, 'epoch': 5.82}\n",
+ "{'loss': 0.8554, 'grad_norm': 27.320371627807617, 'learning_rate': 2.1839605294330933e-07, 'epoch': 5.84}\n",
+ "{'loss': 1.036, 'grad_norm': 7.248013973236084, 'learning_rate': 1.725862339392259e-07, 'epoch': 5.85}\n",
+ "{'loss': 0.8262, 'grad_norm': 7.534704685211182, 'learning_rate': 1.3215416497138754e-07, 'epoch': 5.87}\n",
+ "{'loss': 1.0454, 'grad_norm': 5.765580654144287, 'learning_rate': 9.710420977340762e-08, 'epoch': 5.89}\n",
+ "{'loss': 0.8484, 'grad_norm': 5.267190456390381, 'learning_rate': 6.744015120061509e-08, 'epoch': 5.91}\n",
+ "{'loss': 0.9818, 'grad_norm': 6.66579008102417, 'learning_rate': 4.316519082179227e-08, 'epoch': 5.93}\n",
+ "{'loss': 0.8825, 'grad_norm': 4.743204593658447, 'learning_rate': 2.4281948573617874e-08, 'epoch': 5.94}\n",
+ "{'loss': 0.9975, 'grad_norm': 6.015940189361572, 'learning_rate': 1.0792462477909882e-08, 'epoch': 5.96}\n",
+ "{'loss': 0.9418, 'grad_norm': 5.236660957336426, 'learning_rate': 2.6981884216847884e-09, 'epoch': 5.98}\n",
+ "{'loss': 0.9678, 'grad_norm': 5.222324371337891, 'learning_rate': 0.0, 'epoch': 6.0}\n",
+ "100%|█████████████████████████████████████| 3360/3360 [1:12:30<00:00, 1.25s/it][INFO|trainer.py:3788] 2024-07-04 12:22:39,963 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-07-04 12:22:39,963 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-07-04 12:22:39,964 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 9%|███▊ | 4/46 [00:00<00:01, 33.59it/s]\u001b[A\n",
+ " 17%|███████▋ | 8/46 [00:00<00:01, 28.40it/s]\u001b[A\n",
+ " 24%|██████████▎ | 11/46 [00:00<00:01, 27.40it/s]\u001b[A\n",
+ " 30%|█████████████ | 14/46 [00:00<00:01, 27.09it/s]\u001b[A\n",
+ " 37%|███████████████▉ | 17/46 [00:00<00:01, 26.45it/s]\u001b[A\n",
+ " 43%|██████████████████▋ | 20/46 [00:00<00:01, 25.97it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 23/46 [00:00<00:00, 24.53it/s]\u001b[A\n",
+ " 57%|████████████████████████▎ | 26/46 [00:00<00:00, 25.01it/s]\u001b[A\n",
+ " 63%|███████████████████████████ | 29/46 [00:01<00:00, 24.12it/s]\u001b[A\n",
+ " 70%|█████████████████████████████▉ | 32/46 [00:01<00:00, 24.19it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 35/46 [00:01<00:00, 24.74it/s]\u001b[A\n",
+ " 83%|███████████████████████████████████▌ | 38/46 [00:01<00:00, 25.42it/s]\u001b[A\n",
+ " 89%|██████████████████████████████████████▎ | 41/46 [00:01<00:00, 25.84it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 2.547395706176758, 'eval_runtime': 1.8294, 'eval_samples_per_second': 25.145, 'eval_steps_per_second': 25.145, 'epoch': 6.0}\n",
+ "100%|█████████████████████████████████████| 3360/3360 [1:12:32<00:00, 1.25s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:01<00:00, 26.08it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-07-04 12:22:41,795 >> Saving model checkpoint to saves/qwen2-0.5b/lora/sft/checkpoint-3360\n",
+ "[INFO|configuration_utils.py:733] 2024-07-04 12:22:42,459 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-04 12:22:42,460 >> Model config Qwen2Config {\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-07-04 12:22:42,487 >> tokenizer config file saved in saves/qwen2-0.5b/lora/sft/checkpoint-3360/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-07-04 12:22:42,487 >> Special tokens file saved in saves/qwen2-0.5b/lora/sft/checkpoint-3360/special_tokens_map.json\n",
+ "[INFO|trainer.py:2383] 2024-07-04 12:22:42,628 >> \n",
+ "\n",
+ "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
+ "\n",
+ "\n",
+ "{'train_runtime': 4358.4327, 'train_samples_per_second': 6.17, 'train_steps_per_second': 0.771, 'train_loss': 1.4797242326395852, 'epoch': 6.0}\n",
+ "100%|█████████████████████████████████████| 3360/3360 [1:12:33<00:00, 1.30s/it]\n",
+ "[INFO|trainer.py:3478] 2024-07-04 12:22:42,631 >> Saving model checkpoint to saves/qwen2-0.5b/lora/sft\n",
+ "[INFO|configuration_utils.py:733] 2024-07-04 12:22:43,255 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-04 12:22:43,256 >> Model config Qwen2Config {\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-07-04 12:22:43,285 >> tokenizer config file saved in saves/qwen2-0.5b/lora/sft/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-07-04 12:22:43,285 >> Special tokens file saved in saves/qwen2-0.5b/lora/sft/special_tokens_map.json\n",
+ "***** train metrics *****\n",
+ " epoch = 5.9973\n",
+ " total_flos = 4594110GF\n",
+ " train_loss = 1.4797\n",
+ " train_runtime = 1:12:38.43\n",
+ " train_samples_per_second = 6.17\n",
+ " train_steps_per_second = 0.771\n",
+ "Figure saved at: saves/qwen2-0.5b/lora/sft/training_loss.png\n",
+ "Figure saved at: saves/qwen2-0.5b/lora/sft/training_eval_loss.png\n",
+ "[INFO|trainer.py:3788] 2024-07-04 12:22:43,568 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-07-04 12:22:43,568 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-07-04 12:22:43,568 >> Batch size = 1\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:01<00:00, 25.60it/s]\n",
+ "***** eval metrics *****\n",
+ " epoch = 5.9973\n",
+ " eval_loss = 2.5474\n",
+ " eval_runtime = 0:00:01.84\n",
+ " eval_samples_per_second = 24.959\n",
+ " eval_steps_per_second = 24.959\n",
+ "[INFO|modelcard.py:449] 2024-07-04 12:22:45,413 >> Dropping the following result as it does not have all the necessary fields:\n",
+ "{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}}\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: | 0.085 MB of 0.085 MB uploaded\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss ▂▁▂▄▇██\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime ▇█▃▁▇▄▅\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second ▂▁▆█▂▄▄\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second ▂▁▆█▂▄▄\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch ▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step ▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/grad_norm ▄▁▁▃▂▃▃▃▃▄▃▄▂▃▄▆▃▂▄▄▅▆▄▆▅▅▃▄█▅▆█▆▆▅▅▆▇▇▅\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate ▂▄▅▇██████▇▇▇▇▇▆▆▆▆▅▅▅▄▄▄▃▃▃▃▂▂▂▂▁▁▁▁▁▁▁\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss █▇▇▇▆▆▆▆▆▆▅▅▆▅▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▂\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: \n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss 2.5474\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime 1.843\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second 24.959\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second 24.959\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: total_flos 4932888177414144.0\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch 5.99732\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step 3360\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/grad_norm 5.22232\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate 0.0\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss 0.9678\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train_loss 1.47972\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train_runtime 4358.4327\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train_samples_per_second 6.17\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train_steps_per_second 0.771\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: \n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mqwen2_0.5b_lora_sft\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface/runs/u8sqhi0x\u001b[0m\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at: \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface\u001b[0m\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 6 W&B file(s), 0 media file(s), 1 artifact file(s) and 0 other file(s)\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20240704_111005-u8sqhi0x/logs\u001b[0m\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m The new W&B backend becomes opt-out in version 0.18.0; try it out with `wandb.require(\"core\")`! See https://wandb.me/wandb-core for more information.\n",
+ "CPU times: user 59.8 s, sys: 18.1 s, total: 1min 17s\n",
+ "Wall time: 1h 13min 51s\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "!./scripts/tune-lf.sh config/qwen2_0.5b_lora_sft.yaml"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n",
+ "Current Directory:\n",
+ "/home/inflaton/code/projects/courses/llm-finetuning/llama-factory\n",
+ "07/04/2024 12:22:59 - INFO - llamafactory.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, compute dtype: torch.bfloat16\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 12:23:00,122 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/vocab.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 12:23:00,122 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/merges.txt\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 12:23:00,122 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 12:23:00,122 >> loading file added_tokens.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 12:23:00,122 >> loading file special_tokens_map.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 12:23:00,122 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer_config.json\n",
+ "[WARNING|logging.py:313] 2024-07-04 12:23:00,234 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "07/04/2024 12:23:00 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+ "07/04/2024 12:23:00 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+ "07/04/2024 12:23:00 - INFO - llamafactory.data.loader - Loading dataset alpaca_mac.json...\n",
+ "Converting format of dataset (num_proc=16): 100%|█| 4528/4528 [00:00<00:00, 1573\n",
+ "Running tokenizer on dataset (num_proc=16): 100%|█| 4528/4528 [00:01<00:00, 3491\n",
+ "input_ids:\n",
+ "[151644, 872, 198, 5501, 14683, 279, 2701, 8453, 1467, 1119, 6364, 323, 3410, 1172, 279, 24531, 2213, 11, 4302, 770, 624, 35987, 102895, 99164, 100324, 100717, 100095, 99509, 1773, 151645, 198, 151644, 77091, 198, 17949, 358, 572, 2617, 553, 264, 38835, 44486, 13, 151645]\n",
+ "inputs:\n",
+ "<|im_start|>user\n",
+ "Please translate the following Chinese text into English and provide only the translated content, nothing else.\n",
+ "全仗着狐仙搭救。<|im_end|>\n",
+ "<|im_start|>assistant\n",
+ "Because I was protected by a fox fairy.<|im_end|>\n",
+ "label_ids:\n",
+ "[-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 17949, 358, 572, 2617, 553, 264, 38835, 44486, 13, 151645]\n",
+ "labels:\n",
+ "Because I was protected by a fox fairy.<|im_end|>\n",
+ "[INFO|configuration_utils.py:733] 2024-07-04 12:23:03,981 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-04 12:23:03,982 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|modeling_utils.py:3556] 2024-07-04 12:23:04,016 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/model.safetensors\n",
+ "[INFO|modeling_utils.py:1531] 2024-07-04 12:23:06,701 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+ "[INFO|configuration_utils.py:1000] 2024-07-04 12:23:06,704 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645\n",
+ "}\n",
+ "\n",
+ "[INFO|modeling_utils.py:4364] 2024-07-04 12:26:42,040 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+ "\n",
+ "[INFO|modeling_utils.py:4372] 2024-07-04 12:26:42,040 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-1.5B-Instruct.\n",
+ "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+ "[INFO|configuration_utils.py:955] 2024-07-04 12:26:42,765 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/generation_config.json\n",
+ "[INFO|configuration_utils.py:1000] 2024-07-04 12:26:42,766 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"do_sample\": true,\n",
+ " \"eos_token_id\": [\n",
+ " 151645,\n",
+ " 151643\n",
+ " ],\n",
+ " \"pad_token_id\": 151643,\n",
+ " \"repetition_penalty\": 1.1,\n",
+ " \"temperature\": 0.7,\n",
+ " \"top_k\": 20,\n",
+ " \"top_p\": 0.8\n",
+ "}\n",
+ "\n",
+ "07/04/2024 12:26:43 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.\n",
+ "07/04/2024 12:26:43 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
+ "07/04/2024 12:26:43 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.\n",
+ "07/04/2024 12:26:43 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA\n",
+ "07/04/2024 12:26:43 - INFO - llamafactory.model.model_utils.misc - Found linear modules: q_proj,gate_proj,down_proj,k_proj,v_proj,up_proj,o_proj\n",
+ "07/04/2024 12:26:43 - INFO - llamafactory.model.loader - trainable params: 9,232,384 || all params: 1,552,946,688 || trainable%: 0.5945\n",
+ "[INFO|trainer.py:642] 2024-07-04 12:26:43,511 >> Using auto half precision backend\n",
+ "[INFO|trainer.py:2128] 2024-07-04 12:26:43,666 >> ***** Running training *****\n",
+ "[INFO|trainer.py:2129] 2024-07-04 12:26:43,666 >> Num examples = 4,482\n",
+ "[INFO|trainer.py:2130] 2024-07-04 12:26:43,666 >> Num Epochs = 6\n",
+ "[INFO|trainer.py:2131] 2024-07-04 12:26:43,666 >> Instantaneous batch size per device = 1\n",
+ "[INFO|trainer.py:2134] 2024-07-04 12:26:43,666 >> Total train batch size (w. parallel, distributed & accumulation) = 8\n",
+ "[INFO|trainer.py:2135] 2024-07-04 12:26:43,666 >> Gradient Accumulation steps = 8\n",
+ "[INFO|trainer.py:2136] 2024-07-04 12:26:43,666 >> Total optimization steps = 3,360\n",
+ "[INFO|trainer.py:2137] 2024-07-04 12:26:43,668 >> Number of trainable parameters = 9,232,384\n",
+ "[INFO|integration_utils.py:750] 2024-07-04 12:26:43,670 >> Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33minflaton-sg\u001b[0m (\u001b[33minflaton-ai\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.17.4\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m/home/inflaton/code/projects/courses/llm-finetuning/llama-factory/wandb/run-20240704_122645-mpc5sxtf\u001b[0m\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mqwen2_1.5b_lora_sft\u001b[0m\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface\u001b[0m\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface/runs/mpc5sxtf\u001b[0m\n",
+ "{'loss': 2.1612, 'grad_norm': 1.7288845777511597, 'learning_rate': 2.9761904761904763e-06, 'epoch': 0.02}\n",
+ "{'loss': 2.2871, 'grad_norm': 1.9337925910949707, 'learning_rate': 5.9523809523809525e-06, 'epoch': 0.04}\n",
+ "{'loss': 2.1455, 'grad_norm': 1.5129448175430298, 'learning_rate': 8.92857142857143e-06, 'epoch': 0.05}\n",
+ "{'loss': 2.1376, 'grad_norm': 2.9766852855682373, 'learning_rate': 1.1904761904761905e-05, 'epoch': 0.07}\n",
+ "{'loss': 2.2937, 'grad_norm': 1.413576602935791, 'learning_rate': 1.4880952380952381e-05, 'epoch': 0.09}\n",
+ "{'loss': 2.0076, 'grad_norm': 1.7012724876403809, 'learning_rate': 1.785714285714286e-05, 'epoch': 0.11}\n",
+ "{'loss': 2.1399, 'grad_norm': 1.679208517074585, 'learning_rate': 2.0833333333333336e-05, 'epoch': 0.12}\n",
+ "{'loss': 1.9036, 'grad_norm': 1.6296344995498657, 'learning_rate': 2.380952380952381e-05, 'epoch': 0.14}\n",
+ "{'loss': 2.0186, 'grad_norm': 2.1293675899505615, 'learning_rate': 2.6785714285714288e-05, 'epoch': 0.16}\n",
+ "{'loss': 1.9517, 'grad_norm': 1.4419277906417847, 'learning_rate': 2.9761904761904762e-05, 'epoch': 0.18}\n",
+ "{'loss': 1.979, 'grad_norm': 1.8672434091567993, 'learning_rate': 3.273809523809524e-05, 'epoch': 0.2}\n",
+ "{'loss': 1.9362, 'grad_norm': 1.3589439392089844, 'learning_rate': 3.571428571428572e-05, 'epoch': 0.21}\n",
+ "{'loss': 1.9264, 'grad_norm': 1.71873140335083, 'learning_rate': 3.8690476190476195e-05, 'epoch': 0.23}\n",
+ "{'loss': 1.9515, 'grad_norm': 2.2398152351379395, 'learning_rate': 4.166666666666667e-05, 'epoch': 0.25}\n",
+ "{'loss': 1.8163, 'grad_norm': 1.5651923418045044, 'learning_rate': 4.464285714285715e-05, 'epoch': 0.27}\n",
+ "{'loss': 1.792, 'grad_norm': 1.5333657264709473, 'learning_rate': 4.761904761904762e-05, 'epoch': 0.29}\n",
+ "{'loss': 1.9457, 'grad_norm': 1.448676347732544, 'learning_rate': 5.05952380952381e-05, 'epoch': 0.3}\n",
+ "{'loss': 1.7585, 'grad_norm': 1.8496005535125732, 'learning_rate': 5.3571428571428575e-05, 'epoch': 0.32}\n",
+ "{'loss': 1.8682, 'grad_norm': 2.332167387008667, 'learning_rate': 5.6547619047619046e-05, 'epoch': 0.34}\n",
+ "{'loss': 1.9775, 'grad_norm': 1.9075323343276978, 'learning_rate': 5.9523809523809524e-05, 'epoch': 0.36}\n",
+ "{'loss': 1.9233, 'grad_norm': 1.8132203817367554, 'learning_rate': 6.25e-05, 'epoch': 0.37}\n",
+ "{'loss': 1.8469, 'grad_norm': 2.514983892440796, 'learning_rate': 6.547619047619048e-05, 'epoch': 0.39}\n",
+ "{'loss': 1.8242, 'grad_norm': 2.0344440937042236, 'learning_rate': 6.845238095238096e-05, 'epoch': 0.41}\n",
+ "{'loss': 1.9965, 'grad_norm': 2.310185194015503, 'learning_rate': 7.142857142857143e-05, 'epoch': 0.43}\n",
+ "{'loss': 1.9004, 'grad_norm': 2.3513343334198, 'learning_rate': 7.440476190476191e-05, 'epoch': 0.45}\n",
+ "{'loss': 1.8188, 'grad_norm': 2.2934393882751465, 'learning_rate': 7.738095238095239e-05, 'epoch': 0.46}\n",
+ "{'loss': 1.8803, 'grad_norm': 2.8724184036254883, 'learning_rate': 8.035714285714287e-05, 'epoch': 0.48}\n",
+ "{'loss': 1.9181, 'grad_norm': 2.4238462448120117, 'learning_rate': 8.333333333333334e-05, 'epoch': 0.5}\n",
+ "{'loss': 1.6932, 'grad_norm': 1.5286414623260498, 'learning_rate': 8.630952380952382e-05, 'epoch': 0.52}\n",
+ "{'loss': 1.8331, 'grad_norm': 2.563647985458374, 'learning_rate': 8.92857142857143e-05, 'epoch': 0.54}\n",
+ "{'loss': 1.8539, 'grad_norm': 2.127699613571167, 'learning_rate': 9.226190476190478e-05, 'epoch': 0.55}\n",
+ "{'loss': 1.6796, 'grad_norm': 3.4179396629333496, 'learning_rate': 9.523809523809524e-05, 'epoch': 0.57}\n",
+ "{'loss': 1.7209, 'grad_norm': 2.492151975631714, 'learning_rate': 9.821428571428572e-05, 'epoch': 0.59}\n",
+ "{'loss': 1.7723, 'grad_norm': 2.3568859100341797, 'learning_rate': 9.999956828659095e-05, 'epoch': 0.61}\n",
+ "{'loss': 1.7839, 'grad_norm': 3.5560832023620605, 'learning_rate': 9.999471159635539e-05, 'epoch': 0.62}\n",
+ "{'loss': 1.7146, 'grad_norm': 1.712493658065796, 'learning_rate': 9.998445910004082e-05, 'epoch': 0.64}\n",
+ "{'loss': 1.8911, 'grad_norm': 2.824240207672119, 'learning_rate': 9.996881190417393e-05, 'epoch': 0.66}\n",
+ "{'loss': 1.8631, 'grad_norm': 2.2122113704681396, 'learning_rate': 9.994777169751806e-05, 'epoch': 0.68}\n",
+ "{'loss': 1.5738, 'grad_norm': 1.7466025352478027, 'learning_rate': 9.992134075089084e-05, 'epoch': 0.7}\n",
+ "{'loss': 1.7552, 'grad_norm': 2.581709623336792, 'learning_rate': 9.988952191691925e-05, 'epoch': 0.71}\n",
+ "{'loss': 1.808, 'grad_norm': 2.1387972831726074, 'learning_rate': 9.985231862973168e-05, 'epoch': 0.73}\n",
+ "{'loss': 1.8303, 'grad_norm': 1.7675608396530151, 'learning_rate': 9.980973490458728e-05, 'epoch': 0.75}\n",
+ "{'loss': 1.8013, 'grad_norm': 2.706218719482422, 'learning_rate': 9.976177533744261e-05, 'epoch': 0.77}\n",
+ "{'loss': 1.7443, 'grad_norm': 2.9387295246124268, 'learning_rate': 9.97084451044556e-05, 'epoch': 0.79}\n",
+ "{'loss': 1.6509, 'grad_norm': 1.6503076553344727, 'learning_rate': 9.964974996142698e-05, 'epoch': 0.8}\n",
+ "{'loss': 1.722, 'grad_norm': 2.0305140018463135, 'learning_rate': 9.958569624317893e-05, 'epoch': 0.82}\n",
+ "{'loss': 1.7625, 'grad_norm': 2.8122429847717285, 'learning_rate': 9.951629086287151e-05, 'epoch': 0.84}\n",
+ "{'loss': 1.7194, 'grad_norm': 2.0110862255096436, 'learning_rate': 9.944154131125642e-05, 'epoch': 0.86}\n",
+ "{'loss': 1.7894, 'grad_norm': 1.7363322973251343, 'learning_rate': 9.936145565586871e-05, 'epoch': 0.87}\n",
+ "{'loss': 1.9447, 'grad_norm': 1.8065259456634521, 'learning_rate': 9.927604254015585e-05, 'epoch': 0.89}\n",
+ "{'loss': 1.8639, 'grad_norm': 1.8963510990142822, 'learning_rate': 9.918531118254507e-05, 'epoch': 0.91}\n",
+ "{'loss': 1.7336, 'grad_norm': 2.30542254447937, 'learning_rate': 9.90892713754483e-05, 'epoch': 0.93}\n",
+ "{'loss': 1.7705, 'grad_norm': 2.9846692085266113, 'learning_rate': 9.898793348420536e-05, 'epoch': 0.95}\n",
+ "{'loss': 1.7884, 'grad_norm': 2.1550045013427734, 'learning_rate': 9.888130844596524e-05, 'epoch': 0.96}\n",
+ "{'loss': 1.7428, 'grad_norm': 2.1323790550231934, 'learning_rate': 9.876940776850569e-05, 'epoch': 0.98}\n",
+ "{'loss': 1.7183, 'grad_norm': 1.8198726177215576, 'learning_rate': 9.865224352899119e-05, 'epoch': 1.0}\n",
+ " 17%|██████▎ | 560/3360 [15:31<1:20:24, 1.72s/it][INFO|trainer.py:3788] 2024-07-04 12:42:20,584 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-07-04 12:42:20,584 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-07-04 12:42:20,585 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 7%|██▊ | 3/46 [00:00<00:01, 27.33it/s]\u001b[A\n",
+ " 13%|█████▋ | 6/46 [00:00<00:01, 20.64it/s]\u001b[A\n",
+ " 20%|████████▌ | 9/46 [00:00<00:01, 20.40it/s]\u001b[A\n",
+ " 26%|███████████▏ | 12/46 [00:00<00:01, 20.19it/s]\u001b[A\n",
+ " 33%|██████████████ | 15/46 [00:00<00:01, 19.61it/s]\u001b[A\n",
+ " 39%|████████████████▊ | 18/46 [00:00<00:01, 19.78it/s]\u001b[A\n",
+ " 43%|██████████████████▋ | 20/46 [00:01<00:01, 18.45it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 23/46 [00:01<00:01, 19.00it/s]\u001b[A\n",
+ " 57%|████████████████████████▎ | 26/46 [00:01<00:01, 19.55it/s]\u001b[A\n",
+ " 63%|███████████████████████████ | 29/46 [00:01<00:00, 19.15it/s]\u001b[A\n",
+ " 67%|████████████████████████████▉ | 31/46 [00:01<00:00, 18.70it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▊ | 33/46 [00:01<00:00, 18.19it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▋ | 36/46 [00:01<00:00, 18.76it/s]\u001b[A\n",
+ " 83%|███████████████████████████████████▌ | 38/46 [00:01<00:00, 18.98it/s]\u001b[A\n",
+ " 87%|█████████████████████████████████████▍ | 40/46 [00:02<00:00, 18.88it/s]\u001b[A\n",
+ " 93%|████████████████████████████████████████▏ | 43/46 [00:02<00:00, 19.56it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 1.7211226224899292, 'eval_runtime': 2.4286, 'eval_samples_per_second': 18.941, 'eval_steps_per_second': 18.941, 'epoch': 1.0}\n",
+ " 17%|██████▎ | 560/3360 [15:34<1:20:24, 1.72s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:02<00:00, 19.47it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-07-04 12:42:23,015 >> Saving model checkpoint to saves/qwen2-1.5b/lora/sft/checkpoint-560\n",
+ "[INFO|configuration_utils.py:733] 2024-07-04 12:42:23,808 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-04 12:42:23,809 >> Model config Qwen2Config {\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-07-04 12:42:23,882 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/checkpoint-560/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-07-04 12:42:23,883 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/checkpoint-560/special_tokens_map.json\n",
+ "{'loss': 1.6305, 'grad_norm': 1.8726240396499634, 'learning_rate': 9.852982837266955e-05, 'epoch': 1.02}\n",
+ "{'loss': 1.4509, 'grad_norm': 2.8097503185272217, 'learning_rate': 9.840217551150706e-05, 'epoch': 1.04}\n",
+ "{'loss': 1.6345, 'grad_norm': 2.0100064277648926, 'learning_rate': 9.826929872276255e-05, 'epoch': 1.05}\n",
+ "{'loss': 1.5736, 'grad_norm': 2.456465482711792, 'learning_rate': 9.81312123475006e-05, 'epoch': 1.07}\n",
+ "{'loss': 1.5363, 'grad_norm': 2.7739548683166504, 'learning_rate': 9.798793128904356e-05, 'epoch': 1.09}\n",
+ "{'loss': 1.5754, 'grad_norm': 2.8599655628204346, 'learning_rate': 9.78394710113631e-05, 'epoch': 1.11}\n",
+ "{'loss': 1.5728, 'grad_norm': 2.2901456356048584, 'learning_rate': 9.768584753741134e-05, 'epoch': 1.12}\n",
+ "{'loss': 1.5632, 'grad_norm': 3.6802914142608643, 'learning_rate': 9.752707744739145e-05, 'epoch': 1.14}\n",
+ "{'loss': 1.5927, 'grad_norm': 2.5885791778564453, 'learning_rate': 9.736317787696816e-05, 'epoch': 1.16}\n",
+ "{'loss': 1.4571, 'grad_norm': 2.383814573287964, 'learning_rate': 9.719416651541839e-05, 'epoch': 1.18}\n",
+ "{'loss': 1.424, 'grad_norm': 2.032453775405884, 'learning_rate': 9.702006160372209e-05, 'epoch': 1.2}\n",
+ "{'loss': 1.5577, 'grad_norm': 2.8879408836364746, 'learning_rate': 9.684088193259355e-05, 'epoch': 1.21}\n",
+ "{'loss': 1.5083, 'grad_norm': 2.9004592895507812, 'learning_rate': 9.665664684045333e-05, 'epoch': 1.23}\n",
+ "{'loss': 1.5696, 'grad_norm': 3.4651644229888916, 'learning_rate': 9.646737621134112e-05, 'epoch': 1.25}\n",
+ "{'loss': 1.542, 'grad_norm': 3.6657605171203613, 'learning_rate': 9.627309047276974e-05, 'epoch': 1.27}\n",
+ "{'loss': 1.6975, 'grad_norm': 3.4882619380950928, 'learning_rate': 9.607381059352038e-05, 'epoch': 1.29}\n",
+ "{'loss': 1.6179, 'grad_norm': 2.73240327835083, 'learning_rate': 9.586955808137958e-05, 'epoch': 1.3}\n",
+ "{'loss': 1.6236, 'grad_norm': 2.60489559173584, 'learning_rate': 9.566035498081784e-05, 'epoch': 1.32}\n",
+ "{'loss': 1.5901, 'grad_norm': 3.45670223236084, 'learning_rate': 9.544622387061055e-05, 'epoch': 1.34}\n",
+ "{'loss': 1.3816, 'grad_norm': 3.3906328678131104, 'learning_rate': 9.522718786140097e-05, 'epoch': 1.36}\n",
+ "{'loss': 1.6149, 'grad_norm': 3.6723110675811768, 'learning_rate': 9.500327059320606e-05, 'epoch': 1.37}\n",
+ "{'loss': 1.4588, 'grad_norm': 4.5224103927612305, 'learning_rate': 9.477449623286505e-05, 'epoch': 1.39}\n",
+ "{'loss': 1.3431, 'grad_norm': 2.5576796531677246, 'learning_rate': 9.454088947143116e-05, 'epoch': 1.41}\n",
+ "{'loss': 1.6278, 'grad_norm': 3.344188690185547, 'learning_rate': 9.430247552150673e-05, 'epoch': 1.43}\n",
+ "{'loss': 1.5137, 'grad_norm': 3.4474005699157715, 'learning_rate': 9.405928011452211e-05, 'epoch': 1.45}\n",
+ "{'loss': 1.4911, 'grad_norm': 2.6104114055633545, 'learning_rate': 9.381132949795861e-05, 'epoch': 1.46}\n",
+ "{'loss': 1.6567, 'grad_norm': 3.090139150619507, 'learning_rate': 9.35586504325155e-05, 'epoch': 1.48}\n",
+ "{'loss': 1.5008, 'grad_norm': 3.6463866233825684, 'learning_rate': 9.330127018922194e-05, 'epoch': 1.5}\n",
+ "{'loss': 1.4248, 'grad_norm': 2.3963379859924316, 'learning_rate': 9.303921654649362e-05, 'epoch': 1.52}\n",
+ "{'loss': 1.6043, 'grad_norm': 3.4818763732910156, 'learning_rate': 9.277251778713474e-05, 'epoch': 1.54}\n",
+ "{'loss': 1.5517, 'grad_norm': 3.180640697479248, 'learning_rate': 9.250120269528546e-05, 'epoch': 1.55}\n",
+ "{'loss': 1.4711, 'grad_norm': 2.7267000675201416, 'learning_rate': 9.22253005533154e-05, 'epoch': 1.57}\n",
+ "{'loss': 1.5511, 'grad_norm': 3.386282444000244, 'learning_rate': 9.194484113866313e-05, 'epoch': 1.59}\n",
+ "{'loss': 1.6975, 'grad_norm': 2.707632064819336, 'learning_rate': 9.165985472062246e-05, 'epoch': 1.61}\n",
+ "{'loss': 1.4396, 'grad_norm': 2.970285177230835, 'learning_rate': 9.137037205707552e-05, 'epoch': 1.62}\n",
+ "{'loss': 1.5347, 'grad_norm': 2.7082931995391846, 'learning_rate': 9.107642439117321e-05, 'epoch': 1.64}\n",
+ "{'loss': 1.5446, 'grad_norm': 2.947016716003418, 'learning_rate': 9.077804344796302e-05, 'epoch': 1.66}\n",
+ "{'loss': 1.5401, 'grad_norm': 2.4926042556762695, 'learning_rate': 9.04752614309652e-05, 'epoch': 1.68}\n",
+ "{'loss': 1.479, 'grad_norm': 3.50626802444458, 'learning_rate': 9.01681110186971e-05, 'epoch': 1.7}\n",
+ "{'loss': 1.5107, 'grad_norm': 4.556169509887695, 'learning_rate': 8.985662536114613e-05, 'epoch': 1.71}\n",
+ "{'loss': 1.473, 'grad_norm': 2.4575538635253906, 'learning_rate': 8.954083807619208e-05, 'epoch': 1.73}\n",
+ "{'loss': 1.6125, 'grad_norm': 3.063415765762329, 'learning_rate': 8.922078324597879e-05, 'epoch': 1.75}\n",
+ "{'loss': 1.5893, 'grad_norm': 2.45483660697937, 'learning_rate': 8.889649541323574e-05, 'epoch': 1.77}\n",
+ "{'loss': 1.4993, 'grad_norm': 3.031142473220825, 'learning_rate': 8.856800957755e-05, 'epoch': 1.78}\n",
+ "{'loss': 1.5025, 'grad_norm': 2.9005496501922607, 'learning_rate': 8.823536119158864e-05, 'epoch': 1.8}\n",
+ "{'loss': 1.4725, 'grad_norm': 2.9155054092407227, 'learning_rate': 8.789858615727265e-05, 'epoch': 1.82}\n",
+ "{'loss': 1.4313, 'grad_norm': 2.5998966693878174, 'learning_rate': 8.755772082190194e-05, 'epoch': 1.84}\n",
+ "{'loss': 1.5647, 'grad_norm': 2.5580039024353027, 'learning_rate': 8.721280197423258e-05, 'epoch': 1.86}\n",
+ "{'loss': 1.4349, 'grad_norm': 3.395029067993164, 'learning_rate': 8.68638668405062e-05, 'epoch': 1.87}\n",
+ "{'loss': 1.5214, 'grad_norm': 2.8961341381073, 'learning_rate': 8.651095308043232e-05, 'epoch': 1.89}\n",
+ "{'loss': 1.6206, 'grad_norm': 3.4450645446777344, 'learning_rate': 8.61540987831238e-05, 'epoch': 1.91}\n",
+ "{'loss': 1.6429, 'grad_norm': 3.4198362827301025, 'learning_rate': 8.579334246298593e-05, 'epoch': 1.93}\n",
+ "{'loss': 1.5473, 'grad_norm': 2.9955196380615234, 'learning_rate': 8.542872305555978e-05, 'epoch': 1.95}\n",
+ "{'loss': 1.4405, 'grad_norm': 2.7997260093688965, 'learning_rate': 8.50602799133199e-05, 'epoch': 1.96}\n",
+ "{'loss': 1.5382, 'grad_norm': 2.4689786434173584, 'learning_rate': 8.468805280142709e-05, 'epoch': 1.98}\n",
+ "{'loss': 1.5378, 'grad_norm': 3.09759783744812, 'learning_rate': 8.43120818934367e-05, 'epoch': 2.0}\n",
+ " 33%|████████████▎ | 1120/3360 [32:07<1:05:51, 1.76s/it][INFO|trainer.py:3788] 2024-07-04 12:58:56,606 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-07-04 12:58:56,606 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-07-04 12:58:56,606 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 7%|██▊ | 3/46 [00:00<00:01, 25.84it/s]\u001b[A\n",
+ " 13%|█████▋ | 6/46 [00:00<00:01, 21.96it/s]\u001b[A\n",
+ " 20%|████████▌ | 9/46 [00:00<00:01, 21.04it/s]\u001b[A\n",
+ " 26%|███████████▏ | 12/46 [00:00<00:01, 19.14it/s]\u001b[A\n",
+ " 30%|█████████████ | 14/46 [00:00<00:01, 19.13it/s]\u001b[A\n",
+ " 35%|██████████████▉ | 16/46 [00:00<00:01, 18.60it/s]\u001b[A\n",
+ " 39%|████████████████▊ | 18/46 [00:00<00:01, 18.46it/s]\u001b[A\n",
+ " 43%|██████████████████▋ | 20/46 [00:01<00:01, 18.88it/s]\u001b[A\n",
+ " 48%|████████████████████▌ | 22/46 [00:01<00:01, 19.01it/s]\u001b[A\n",
+ " 52%|██████████████████████▍ | 24/46 [00:01<00:01, 17.92it/s]\u001b[A\n",
+ " 59%|█████████████████████████▏ | 27/46 [00:01<00:01, 18.62it/s]\u001b[A\n",
+ " 63%|███████████████████████████ | 29/46 [00:01<00:00, 18.49it/s]\u001b[A\n",
+ " 67%|████████████████████████████▉ | 31/46 [00:01<00:00, 18.80it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▊ | 33/46 [00:01<00:00, 18.48it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 35/46 [00:01<00:00, 18.48it/s]\u001b[A\n",
+ " 80%|██████████████████████████████████▌ | 37/46 [00:01<00:00, 18.86it/s]\u001b[A\n",
+ " 85%|████████████████████████████████████▍ | 39/46 [00:02<00:00, 18.91it/s]\u001b[A\n",
+ " 89%|██████████████████████████████████████▎ | 41/46 [00:02<00:00, 18.85it/s]\u001b[A\n",
+ " 93%|████████████████████████████████████████▏ | 43/46 [00:02<00:00, 19.05it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 1.6970319747924805, 'eval_runtime': 2.4642, 'eval_samples_per_second': 18.668, 'eval_steps_per_second': 18.668, 'epoch': 2.0}\n",
+ " 33%|████████████▎ | 1120/3360 [32:10<1:05:51, 1.76s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:02<00:00, 19.46it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-07-04 12:58:59,073 >> Saving model checkpoint to saves/qwen2-1.5b/lora/sft/checkpoint-1120\n",
+ "[INFO|configuration_utils.py:733] 2024-07-04 12:58:59,895 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-04 12:58:59,896 >> Model config Qwen2Config {\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-07-04 12:58:59,945 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/checkpoint-1120/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-07-04 12:58:59,945 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/checkpoint-1120/special_tokens_map.json\n",
+ "{'loss': 1.2363, 'grad_norm': 3.1393024921417236, 'learning_rate': 8.393240776696274e-05, 'epoch': 2.02}\n",
+ "{'loss': 1.1161, 'grad_norm': 2.708930253982544, 'learning_rate': 8.354907139929851e-05, 'epoch': 2.03}\n",
+ "{'loss': 1.1975, 'grad_norm': 4.3620429039001465, 'learning_rate': 8.316211416299397e-05, 'epoch': 2.05}\n",
+ "{'loss': 1.1225, 'grad_norm': 3.3463101387023926, 'learning_rate': 8.27715778213905e-05, 'epoch': 2.07}\n",
+ "{'loss': 1.0548, 'grad_norm': 2.8970718383789062, 'learning_rate': 8.237750452411353e-05, 'epoch': 2.09}\n",
+ "{'loss': 1.1526, 'grad_norm': 2.99774432182312, 'learning_rate': 8.197993680252334e-05, 'epoch': 2.11}\n",
+ "{'loss': 1.3093, 'grad_norm': 4.249015808105469, 'learning_rate': 8.157891756512488e-05, 'epoch': 2.12}\n",
+ "{'loss': 1.1306, 'grad_norm': 3.889763593673706, 'learning_rate': 8.117449009293668e-05, 'epoch': 2.14}\n",
+ "{'loss': 1.1286, 'grad_norm': 4.101832866668701, 'learning_rate': 8.076669803481965e-05, 'epoch': 2.16}\n",
+ "{'loss': 1.1271, 'grad_norm': 4.3527703285217285, 'learning_rate': 8.035558540276618e-05, 'epoch': 2.18}\n",
+ "{'loss': 1.1593, 'grad_norm': 3.5413858890533447, 'learning_rate': 7.994119656715002e-05, 'epoch': 2.2}\n",
+ "{'loss': 1.1824, 'grad_norm': 5.094357490539551, 'learning_rate': 7.952357625193749e-05, 'epoch': 2.21}\n",
+ "{'loss': 1.2347, 'grad_norm': 5.2239089012146, 'learning_rate': 7.91027695298606e-05, 'epoch': 2.23}\n",
+ "{'loss': 1.2285, 'grad_norm': 5.532718658447266, 'learning_rate': 7.86788218175523e-05, 'epoch': 2.25}\n",
+ "{'loss': 1.1147, 'grad_norm': 3.8143270015716553, 'learning_rate': 7.8251778870645e-05, 'epoch': 2.27}\n",
+ "{'loss': 1.1478, 'grad_norm': 4.406189441680908, 'learning_rate': 7.782168677883206e-05, 'epoch': 2.28}\n",
+ "{'loss': 1.1846, 'grad_norm': 3.269481658935547, 'learning_rate': 7.738859196089358e-05, 'epoch': 2.3}\n",
+ "{'loss': 1.2015, 'grad_norm': 4.366032123565674, 'learning_rate': 7.695254115968648e-05, 'epoch': 2.32}\n",
+ "{'loss': 1.3038, 'grad_norm': 3.7871077060699463, 'learning_rate': 7.651358143709972e-05, 'epoch': 2.34}\n",
+ "{'loss': 1.2532, 'grad_norm': 3.805539846420288, 'learning_rate': 7.60717601689749e-05, 'epoch': 2.36}\n",
+ "{'loss': 1.2044, 'grad_norm': 4.302929401397705, 'learning_rate': 7.562712503999327e-05, 'epoch': 2.37}\n",
+ "{'loss': 1.3852, 'grad_norm': 4.319093227386475, 'learning_rate': 7.517972403852905e-05, 'epoch': 2.39}\n",
+ "{'loss': 1.2647, 'grad_norm': 3.8114326000213623, 'learning_rate': 7.472960545147038e-05, 'epoch': 2.41}\n",
+ "{'loss': 1.1138, 'grad_norm': 4.816274166107178, 'learning_rate': 7.427681785900761e-05, 'epoch': 2.43}\n",
+ "{'loss': 1.1797, 'grad_norm': 3.7659311294555664, 'learning_rate': 7.382141012939034e-05, 'epoch': 2.45}\n",
+ "{'loss': 1.1566, 'grad_norm': 3.777496337890625, 'learning_rate': 7.33634314136531e-05, 'epoch': 2.46}\n",
+ "{'loss': 1.2235, 'grad_norm': 3.779813051223755, 'learning_rate': 7.290293114031061e-05, 'epoch': 2.48}\n",
+ "{'loss': 1.3044, 'grad_norm': 4.243238925933838, 'learning_rate': 7.243995901002312e-05, 'epoch': 2.5}\n",
+ "{'loss': 1.1993, 'grad_norm': 3.7302756309509277, 'learning_rate': 7.197456499023225e-05, 'epoch': 2.52}\n",
+ "{'loss': 1.1955, 'grad_norm': 3.837207555770874, 'learning_rate': 7.150679930976825e-05, 'epoch': 2.53}\n",
+ "{'loss': 1.2282, 'grad_norm': 4.182308673858643, 'learning_rate': 7.103671245342887e-05, 'epoch': 2.55}\n",
+ "{'loss': 1.1068, 'grad_norm': 4.697420120239258, 'learning_rate': 7.056435515653059e-05, 'epoch': 2.57}\n",
+ "{'loss': 1.3001, 'grad_norm': 5.241019248962402, 'learning_rate': 7.008977839943299e-05, 'epoch': 2.59}\n",
+ "{'loss': 1.1734, 'grad_norm': 5.618649959564209, 'learning_rate': 6.961303340203653e-05, 'epoch': 2.61}\n",
+ "{'loss': 1.2205, 'grad_norm': 3.501143455505371, 'learning_rate': 6.91341716182545e-05, 'epoch': 2.62}\n",
+ "{'loss': 1.2196, 'grad_norm': 2.823162317276001, 'learning_rate': 6.86532447304597e-05, 'epoch': 2.64}\n",
+ "{'loss': 1.1884, 'grad_norm': 3.8134286403656006, 'learning_rate': 6.817030464390656e-05, 'epoch': 2.66}\n",
+ "{'loss': 1.296, 'grad_norm': 3.9806973934173584, 'learning_rate': 6.768540348112907e-05, 'epoch': 2.68}\n",
+ "{'loss': 1.0861, 'grad_norm': 5.336892604827881, 'learning_rate': 6.719859357631535e-05, 'epoch': 2.7}\n",
+ "{'loss': 1.1123, 'grad_norm': 5.413362503051758, 'learning_rate': 6.670992746965938e-05, 'epoch': 2.71}\n",
+ "{'loss': 1.3405, 'grad_norm': 3.942927122116089, 'learning_rate': 6.621945790169036e-05, 'epoch': 2.73}\n",
+ "{'loss': 1.2739, 'grad_norm': 3.9731507301330566, 'learning_rate': 6.572723780758069e-05, 'epoch': 2.75}\n",
+ "{'loss': 1.2215, 'grad_norm': 3.9058139324188232, 'learning_rate': 6.523332031143272e-05, 'epoch': 2.77}\n",
+ "{'loss': 1.231, 'grad_norm': 3.7157390117645264, 'learning_rate': 6.473775872054521e-05, 'epoch': 2.78}\n",
+ "{'loss': 1.0667, 'grad_norm': 3.2383055686950684, 'learning_rate': 6.424060651966007e-05, 'epoch': 2.8}\n",
+ "{'loss': 1.1742, 'grad_norm': 3.6972646713256836, 'learning_rate': 6.374191736518974e-05, 'epoch': 2.82}\n",
+ "{'loss': 1.2108, 'grad_norm': 3.783498764038086, 'learning_rate': 6.324174507942637e-05, 'epoch': 2.84}\n",
+ "{'loss': 1.1861, 'grad_norm': 4.8546037673950195, 'learning_rate': 6.274014364473274e-05, 'epoch': 2.86}\n",
+ "{'loss': 1.364, 'grad_norm': 3.1121954917907715, 'learning_rate': 6.22371671977162e-05, 'epoch': 2.87}\n",
+ "{'loss': 1.2202, 'grad_norm': 4.141942024230957, 'learning_rate': 6.173287002338577e-05, 'epoch': 2.89}\n",
+ "{'loss': 1.2125, 'grad_norm': 4.195278167724609, 'learning_rate': 6.122730654929334e-05, 'epoch': 2.91}\n",
+ "{'loss': 1.1392, 'grad_norm': 3.6065282821655273, 'learning_rate': 6.072053133965938e-05, 'epoch': 2.93}\n",
+ "{'loss': 1.3093, 'grad_norm': 3.8997342586517334, 'learning_rate': 6.021259908948402e-05, 'epoch': 2.95}\n",
+ "{'loss': 1.258, 'grad_norm': 4.212363243103027, 'learning_rate': 5.970356461864391e-05, 'epoch': 2.96}\n",
+ "{'loss': 1.1774, 'grad_norm': 4.735218524932861, 'learning_rate': 5.919348286597569e-05, 'epoch': 2.98}\n",
+ "{'loss': 1.2808, 'grad_norm': 3.88008713722229, 'learning_rate': 5.868240888334653e-05, 'epoch': 3.0}\n",
+ " 50%|███████████████████▌ | 1680/3360 [48:42<49:15, 1.76s/it][INFO|trainer.py:3788] 2024-07-04 13:15:31,424 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-07-04 13:15:31,425 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-07-04 13:15:31,425 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 7%|██▊ | 3/46 [00:00<00:01, 29.32it/s]\u001b[A\n",
+ " 13%|█████▋ | 6/46 [00:00<00:01, 22.95it/s]\u001b[A\n",
+ " 20%|████████▌ | 9/46 [00:00<00:01, 21.48it/s]\u001b[A\n",
+ " 26%|███████████▏ | 12/46 [00:00<00:01, 20.77it/s]\u001b[A\n",
+ " 33%|██████████████ | 15/46 [00:00<00:01, 20.79it/s]\u001b[A\n",
+ " 39%|████████████████▊ | 18/46 [00:00<00:01, 20.58it/s]\u001b[A\n",
+ " 46%|███████████████████▋ | 21/46 [00:00<00:01, 20.58it/s]\u001b[A\n",
+ " 52%|██████████████████████▍ | 24/46 [00:01<00:01, 20.58it/s]\u001b[A\n",
+ " 59%|█████████████████████████▏ | 27/46 [00:01<00:00, 20.54it/s]\u001b[A\n",
+ " 65%|████████████████████████████ | 30/46 [00:01<00:00, 19.77it/s]\u001b[A\n",
+ " 70%|█████████████████████████████▉ | 32/46 [00:01<00:00, 19.59it/s]\u001b[A\n",
+ " 74%|███████████████████████████████▊ | 34/46 [00:01<00:00, 19.42it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▋ | 36/46 [00:01<00:00, 18.92it/s]\u001b[A\n",
+ " 83%|███████████████████████████████████▌ | 38/46 [00:01<00:00, 19.10it/s]\u001b[A\n",
+ " 87%|█████████████████████████████████████▍ | 40/46 [00:01<00:00, 18.95it/s]\u001b[A\n",
+ " 91%|███████████████████████████████████████▎ | 42/46 [00:02<00:00, 17.92it/s]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▏ | 44/46 [00:02<00:00, 18.37it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 1.8097562789916992, 'eval_runtime': 2.3741, 'eval_samples_per_second': 19.376, 'eval_steps_per_second': 19.376, 'epoch': 3.0}\n",
+ " 50%|███████████████████▌ | 1680/3360 [48:45<49:15, 1.76s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:02<00:00, 18.65it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-07-04 13:15:33,801 >> Saving model checkpoint to saves/qwen2-1.5b/lora/sft/checkpoint-1680\n",
+ "[INFO|configuration_utils.py:733] 2024-07-04 13:15:34,788 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-04 13:15:34,789 >> Model config Qwen2Config {\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-07-04 13:15:34,839 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/checkpoint-1680/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-07-04 13:15:34,839 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/checkpoint-1680/special_tokens_map.json\n",
+ "{'loss': 1.1006, 'grad_norm': 3.581298589706421, 'learning_rate': 5.8170397829712485e-05, 'epoch': 3.02}\n",
+ "{'loss': 0.7853, 'grad_norm': 4.149472713470459, 'learning_rate': 5.765750496516547e-05, 'epoch': 3.03}\n",
+ "{'loss': 0.9606, 'grad_norm': 4.768033027648926, 'learning_rate': 5.714378564496901e-05, 'epoch': 3.05}\n",
+ "{'loss': 0.8799, 'grad_norm': 3.7473530769348145, 'learning_rate': 5.6629295313583974e-05, 'epoch': 3.07}\n",
+ "{'loss': 0.8164, 'grad_norm': 3.66397762298584, 'learning_rate': 5.611408949868457e-05, 'epoch': 3.09}\n",
+ "{'loss': 0.8902, 'grad_norm': 5.061825275421143, 'learning_rate': 5.559822380516539e-05, 'epoch': 3.11}\n",
+ "{'loss': 0.8534, 'grad_norm': 4.06561803817749, 'learning_rate': 5.5081753909140096e-05, 'epoch': 3.12}\n",
+ "{'loss': 0.9668, 'grad_norm': 4.875536918640137, 'learning_rate': 5.456473555193242e-05, 'epoch': 3.14}\n",
+ "{'loss': 0.8607, 'grad_norm': 6.051300048828125, 'learning_rate': 5.404722453406017e-05, 'epoch': 3.16}\n",
+ "{'loss': 0.9096, 'grad_norm': 5.530092716217041, 'learning_rate': 5.3529276709212816e-05, 'epoch': 3.18}\n",
+ "{'loss': 0.9201, 'grad_norm': 6.524964809417725, 'learning_rate': 5.30109479782233e-05, 'epoch': 3.2}\n",
+ "{'loss': 0.856, 'grad_norm': 4.842297554016113, 'learning_rate': 5.249229428303486e-05, 'epoch': 3.21}\n",
+ "{'loss': 1.0534, 'grad_norm': 3.963986396789551, 'learning_rate': 5.197337160066331e-05, 'epoch': 3.23}\n",
+ "{'loss': 0.8642, 'grad_norm': 4.481607437133789, 'learning_rate': 5.145423593715557e-05, 'epoch': 3.25}\n",
+ "{'loss': 0.8856, 'grad_norm': 3.9990179538726807, 'learning_rate': 5.0934943321545115e-05, 'epoch': 3.27}\n",
+ "{'loss': 0.7925, 'grad_norm': 4.209486484527588, 'learning_rate': 5.041554979980486e-05, 'epoch': 3.28}\n",
+ "{'loss': 0.9874, 'grad_norm': 4.624832630157471, 'learning_rate': 4.9896111428798254e-05, 'epoch': 3.3}\n",
+ "{'loss': 0.9581, 'grad_norm': 5.0234785079956055, 'learning_rate': 4.9376684270229254e-05, 'epoch': 3.32}\n",
+ "{'loss': 0.9273, 'grad_norm': 4.156904220581055, 'learning_rate': 4.8857324384591653e-05, 'epoch': 3.34}\n",
+ "{'loss': 0.8929, 'grad_norm': 4.292726516723633, 'learning_rate': 4.8338087825118675e-05, 'epoch': 3.36}\n",
+ "{'loss': 0.9584, 'grad_norm': 5.206954002380371, 'learning_rate': 4.781903063173321e-05, 'epoch': 3.37}\n",
+ "{'loss': 0.8548, 'grad_norm': 4.075423717498779, 'learning_rate': 4.730020882499964e-05, 'epoch': 3.39}\n",
+ "{'loss': 0.9083, 'grad_norm': 5.6302008628845215, 'learning_rate': 4.678167840007767e-05, 'epoch': 3.41}\n",
+ "{'loss': 0.9967, 'grad_norm': 4.765602111816406, 'learning_rate': 4.626349532067879e-05, 'epoch': 3.43}\n",
+ "{'loss': 0.9439, 'grad_norm': 4.012918949127197, 'learning_rate': 4.574571551302647e-05, 'epoch': 3.44}\n",
+ "{'loss': 0.9758, 'grad_norm': 3.995499849319458, 'learning_rate': 4.522839485981994e-05, 'epoch': 3.46}\n",
+ "{'loss': 0.9056, 'grad_norm': 4.168616771697998, 'learning_rate': 4.471158919420312e-05, 'epoch': 3.48}\n",
+ "{'loss': 0.8866, 'grad_norm': 5.359450817108154, 'learning_rate': 4.4195354293738484e-05, 'epoch': 3.5}\n",
+ "{'loss': 0.8474, 'grad_norm': 5.709634304046631, 'learning_rate': 4.367974587438733e-05, 'epoch': 3.52}\n",
+ "{'loss': 0.9325, 'grad_norm': 6.928687572479248, 'learning_rate': 4.316481958449634e-05, 'epoch': 3.53}\n",
+ "{'loss': 0.8947, 'grad_norm': 9.245586395263672, 'learning_rate': 4.2650630998791615e-05, 'epoch': 3.55}\n",
+ "{'loss': 1.0068, 'grad_norm': 7.456272602081299, 'learning_rate': 4.213723561238074e-05, 'epoch': 3.57}\n",
+ "{'loss': 0.8202, 'grad_norm': 3.894721746444702, 'learning_rate': 4.162468883476319e-05, 'epoch': 3.59}\n",
+ "{'loss': 0.8858, 'grad_norm': 4.249356269836426, 'learning_rate': 4.111304598385018e-05, 'epoch': 3.61}\n",
+ "{'loss': 0.9275, 'grad_norm': 6.780489921569824, 'learning_rate': 4.060236227999441e-05, 'epoch': 3.62}\n",
+ "{'loss': 0.8648, 'grad_norm': 5.042501449584961, 'learning_rate': 4.0092692840030134e-05, 'epoch': 3.64}\n",
+ "{'loss': 0.8905, 'grad_norm': 4.697298526763916, 'learning_rate': 3.9584092671324606e-05, 'epoch': 3.66}\n",
+ "{'loss': 0.9487, 'grad_norm': 8.913374900817871, 'learning_rate': 3.907661666584131e-05, 'epoch': 3.68}\n",
+ "{'loss': 0.9711, 'grad_norm': 6.1330885887146, 'learning_rate': 3.857031959421553e-05, 'epoch': 3.69}\n",
+ "{'loss': 0.9569, 'grad_norm': 4.3145599365234375, 'learning_rate': 3.806525609984312e-05, 'epoch': 3.71}\n",
+ "{'loss': 0.8455, 'grad_norm': 5.199124813079834, 'learning_rate': 3.7561480692983006e-05, 'epoch': 3.73}\n",
+ "{'loss': 0.9282, 'grad_norm': 4.841589450836182, 'learning_rate': 3.705904774487396e-05, 'epoch': 3.75}\n",
+ "{'loss': 0.9613, 'grad_norm': 5.256564140319824, 'learning_rate': 3.655801148186655e-05, 'epoch': 3.77}\n",
+ "{'loss': 0.832, 'grad_norm': 4.737905025482178, 'learning_rate': 3.6058425979570485e-05, 'epoch': 3.78}\n",
+ "{'loss': 0.9194, 'grad_norm': 4.6155524253845215, 'learning_rate': 3.556034515701852e-05, 'epoch': 3.8}\n",
+ "{'loss': 0.9149, 'grad_norm': 5.484898090362549, 'learning_rate': 3.506382277084696e-05, 'epoch': 3.82}\n",
+ "{'loss': 0.9029, 'grad_norm': 4.691559314727783, 'learning_rate': 3.4568912409493945e-05, 'epoch': 3.84}\n",
+ "{'loss': 0.7487, 'grad_norm': 4.6990132331848145, 'learning_rate': 3.4075667487415785e-05, 'epoch': 3.86}\n",
+ "{'loss': 0.8389, 'grad_norm': 3.9886608123779297, 'learning_rate': 3.358414123932195e-05, 'epoch': 3.87}\n",
+ "{'loss': 0.9443, 'grad_norm': 3.797034978866577, 'learning_rate': 3.3094386714429724e-05, 'epoch': 3.89}\n",
+ "{'loss': 0.9102, 'grad_norm': 9.836748123168945, 'learning_rate': 3.2606456770738636e-05, 'epoch': 3.91}\n",
+ "{'loss': 0.8031, 'grad_norm': 6.517895221710205, 'learning_rate': 3.212040406932569e-05, 'epoch': 3.93}\n",
+ "{'loss': 0.7276, 'grad_norm': 3.757455825805664, 'learning_rate': 3.163628106866172e-05, 'epoch': 3.94}\n",
+ "{'loss': 1.0437, 'grad_norm': 5.128631591796875, 'learning_rate': 3.115414001894974e-05, 'epoch': 3.96}\n",
+ "{'loss': 0.9261, 'grad_norm': 4.2124457359313965, 'learning_rate': 3.067403295648566e-05, 'epoch': 3.98}\n",
+ "{'loss': 0.7864, 'grad_norm': 3.609720230102539, 'learning_rate': 3.019601169804216e-05, 'epoch': 4.0}\n",
+ " 67%|████████████████████████▋ | 2240/3360 [1:05:16<32:59, 1.77s/it][INFO|trainer.py:3788] 2024-07-04 13:32:05,670 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-07-04 13:32:05,670 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-07-04 13:32:05,670 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 7%|██▊ | 3/46 [00:00<00:01, 24.52it/s]\u001b[A\n",
+ " 13%|█████▋ | 6/46 [00:00<00:01, 20.90it/s]\u001b[A\n",
+ " 20%|████████▌ | 9/46 [00:00<00:01, 20.22it/s]\u001b[A\n",
+ " 26%|███████████▏ | 12/46 [00:00<00:01, 19.67it/s]\u001b[A\n",
+ " 30%|█████████████ | 14/46 [00:00<00:01, 19.66it/s]\u001b[A\n",
+ " 35%|██████████████▉ | 16/46 [00:00<00:01, 18.52it/s]\u001b[A\n",
+ " 39%|████████████████▊ | 18/46 [00:00<00:01, 18.59it/s]\u001b[A\n",
+ " 43%|██████████████████▋ | 20/46 [00:01<00:01, 18.81it/s]\u001b[A\n",
+ " 48%|████████████████████▌ | 22/46 [00:01<00:01, 19.01it/s]\u001b[A\n",
+ " 52%|██████████████████████▍ | 24/46 [00:01<00:01, 18.89it/s]\u001b[A\n",
+ " 57%|████████████████████████▎ | 26/46 [00:01<00:01, 19.08it/s]\u001b[A\n",
+ " 63%|███████████████████████████ | 29/46 [00:01<00:00, 19.48it/s]\u001b[A\n",
+ " 67%|████████████████████████████▉ | 31/46 [00:01<00:00, 19.48it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▊ | 33/46 [00:01<00:00, 19.42it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▋ | 36/46 [00:01<00:00, 19.37it/s]\u001b[A\n",
+ " 83%|███████████████████████████████████▌ | 38/46 [00:01<00:00, 18.44it/s]\u001b[A\n",
+ " 87%|█████████████████████████████████████▍ | 40/46 [00:02<00:00, 17.52it/s]\u001b[A\n",
+ " 91%|███████████████████████████████████████▎ | 42/46 [00:02<00:00, 17.11it/s]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▏ | 44/46 [00:02<00:00, 17.41it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 1.9859257936477661, 'eval_runtime': 2.5092, 'eval_samples_per_second': 18.332, 'eval_steps_per_second': 18.332, 'epoch': 4.0}\n",
+ " 67%|████████████████████████▋ | 2240/3360 [1:05:19<32:59, 1.77s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:02<00:00, 17.74it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-07-04 13:32:08,182 >> Saving model checkpoint to saves/qwen2-1.5b/lora/sft/checkpoint-2240\n",
+ "[INFO|configuration_utils.py:733] 2024-07-04 13:32:08,839 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-04 13:32:08,839 >> Model config Qwen2Config {\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-07-04 13:32:08,917 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/checkpoint-2240/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-07-04 13:32:08,917 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/checkpoint-2240/special_tokens_map.json\n",
+ "{'loss': 0.7877, 'grad_norm': 3.5848188400268555, 'learning_rate': 2.9720127835276256e-05, 'epoch': 4.02}\n",
+ "{'loss': 0.6372, 'grad_norm': 4.3321661949157715, 'learning_rate': 2.9246432729161055e-05, 'epoch': 4.03}\n",
+ "{'loss': 0.7208, 'grad_norm': 5.079366207122803, 'learning_rate': 2.8774977504442647e-05, 'epoch': 4.05}\n",
+ "{'loss': 0.6386, 'grad_norm': 4.389534950256348, 'learning_rate': 2.8305813044122097e-05, 'epoch': 4.07}\n",
+ "{'loss': 0.6416, 'grad_norm': 3.866262197494507, 'learning_rate': 2.7838989983964065e-05, 'epoch': 4.09}\n",
+ "{'loss': 0.7771, 'grad_norm': 4.4248528480529785, 'learning_rate': 2.737455870703155e-05, 'epoch': 4.11}\n",
+ "{'loss': 0.5944, 'grad_norm': 5.1537370681762695, 'learning_rate': 2.6912569338248315e-05, 'epoch': 4.12}\n",
+ "{'loss': 0.6846, 'grad_norm': 4.410754203796387, 'learning_rate': 2.645307173898901e-05, 'epoch': 4.14}\n",
+ "{'loss': 0.7499, 'grad_norm': 3.9317386150360107, 'learning_rate': 2.5996115501697694e-05, 'epoch': 4.16}\n",
+ "{'loss': 0.6378, 'grad_norm': 3.794434070587158, 'learning_rate': 2.5541749944535554e-05, 'epoch': 4.18}\n",
+ "{'loss': 0.6153, 'grad_norm': 4.012321472167969, 'learning_rate': 2.5090024106057962e-05, 'epoch': 4.19}\n",
+ "{'loss': 0.6922, 'grad_norm': 4.712143898010254, 'learning_rate': 2.464098673992205e-05, 'epoch': 4.21}\n",
+ "{'loss': 0.6079, 'grad_norm': 5.002867698669434, 'learning_rate': 2.4194686309624663e-05, 'epoch': 4.23}\n",
+ "{'loss': 0.7554, 'grad_norm': 6.034168720245361, 'learning_rate': 2.3751170983272e-05, 'epoch': 4.25}\n",
+ "{'loss': 0.6634, 'grad_norm': 5.4491376876831055, 'learning_rate': 2.3310488628380757e-05, 'epoch': 4.27}\n",
+ "{'loss': 0.6635, 'grad_norm': 6.335705280303955, 'learning_rate': 2.2872686806712035e-05, 'epoch': 4.28}\n",
+ "{'loss': 0.6732, 'grad_norm': 4.363458633422852, 'learning_rate': 2.243781276913811e-05, 'epoch': 4.3}\n",
+ "{'loss': 0.5751, 'grad_norm': 4.058308124542236, 'learning_rate': 2.200591345054267e-05, 'epoch': 4.32}\n",
+ "{'loss': 0.7378, 'grad_norm': 5.493106365203857, 'learning_rate': 2.157703546475539e-05, 'epoch': 4.34}\n",
+ "{'loss': 0.6231, 'grad_norm': 4.587257385253906, 'learning_rate': 2.115122509952085e-05, 'epoch': 4.36}\n",
+ "{'loss': 0.6361, 'grad_norm': 4.070307731628418, 'learning_rate': 2.0728528311502976e-05, 'epoch': 4.37}\n",
+ "{'loss': 0.7245, 'grad_norm': 5.507742404937744, 'learning_rate': 2.0308990721324927e-05, 'epoch': 4.39}\n",
+ "{'loss': 0.6516, 'grad_norm': 4.98870849609375, 'learning_rate': 1.989265760864542e-05, 'epoch': 4.41}\n",
+ "{'loss': 0.7311, 'grad_norm': 4.5378618240356445, 'learning_rate': 1.947957390727185e-05, 'epoch': 4.43}\n",
+ "{'loss': 0.713, 'grad_norm': 6.595687389373779, 'learning_rate': 1.906978420031059e-05, 'epoch': 4.44}\n",
+ "{'loss': 0.5884, 'grad_norm': 3.995753765106201, 'learning_rate': 1.8663332715355396e-05, 'epoch': 4.46}\n",
+ "{'loss': 0.7598, 'grad_norm': 3.745181083679199, 'learning_rate': 1.8260263319713844e-05, 'epoch': 4.48}\n",
+ "{'loss': 0.673, 'grad_norm': 5.82590389251709, 'learning_rate': 1.7860619515673033e-05, 'epoch': 4.5}\n",
+ "{'loss': 0.6552, 'grad_norm': 5.151037216186523, 'learning_rate': 1.746444443580433e-05, 'epoch': 4.52}\n",
+ "{'loss': 0.7091, 'grad_norm': 5.6730499267578125, 'learning_rate': 1.7071780838308288e-05, 'epoch': 4.53}\n",
+ "{'loss': 0.7061, 'grad_norm': 5.2298502922058105, 'learning_rate': 1.6682671102399805e-05, 'epoch': 4.55}\n",
+ "{'loss': 0.654, 'grad_norm': 5.273619651794434, 'learning_rate': 1.629715722373423e-05, 'epoch': 4.57}\n",
+ "{'loss': 0.6822, 'grad_norm': 6.466513633728027, 'learning_rate': 1.5915280809874932e-05, 'epoch': 4.59}\n",
+ "{'loss': 0.5785, 'grad_norm': 6.050833702087402, 'learning_rate': 1.553708307580265e-05, 'epoch': 4.61}\n",
+ "{'loss': 0.6877, 'grad_norm': 6.502690315246582, 'learning_rate': 1.5162604839467265e-05, 'epoch': 4.62}\n",
+ "{'loss': 0.6984, 'grad_norm': 4.606260299682617, 'learning_rate': 1.4791886517382413e-05, 'epoch': 4.64}\n",
+ "{'loss': 0.6909, 'grad_norm': 4.80437707901001, 'learning_rate': 1.4424968120263504e-05, 'epoch': 4.66}\n",
+ "{'loss': 0.6827, 'grad_norm': 5.20365047454834, 'learning_rate': 1.4061889248709343e-05, 'epoch': 4.68}\n",
+ "{'loss': 0.6361, 'grad_norm': 6.29000997543335, 'learning_rate': 1.370268908892825e-05, 'epoch': 4.69}\n",
+ "{'loss': 0.6747, 'grad_norm': 4.9368438720703125, 'learning_rate': 1.3347406408508695e-05, 'epoch': 4.71}\n",
+ "{'loss': 0.6435, 'grad_norm': 5.528055667877197, 'learning_rate': 1.2996079552235263e-05, 'epoch': 4.73}\n",
+ "{'loss': 0.6501, 'grad_norm': 4.367548942565918, 'learning_rate': 1.264874643795021e-05, 'epoch': 4.75}\n",
+ "{'loss': 0.6376, 'grad_norm': 4.568158149719238, 'learning_rate': 1.230544455246101e-05, 'epoch': 4.77}\n",
+ "{'loss': 0.7034, 'grad_norm': 5.3214287757873535, 'learning_rate': 1.1966210947494583e-05, 'epoch': 4.78}\n",
+ "{'loss': 0.7303, 'grad_norm': 3.9356067180633545, 'learning_rate': 1.1631082235698316e-05, 'epoch': 4.8}\n",
+ "{'loss': 0.6436, 'grad_norm': 5.198613166809082, 'learning_rate': 1.130009458668863e-05, 'epoch': 4.82}\n",
+ "{'loss': 0.595, 'grad_norm': 4.129484176635742, 'learning_rate': 1.097328372314721e-05, 'epoch': 4.84}\n",
+ "{'loss': 0.6302, 'grad_norm': 8.324830055236816, 'learning_rate': 1.0650684916965559e-05, 'epoch': 4.85}\n",
+ "{'loss': 0.7296, 'grad_norm': 5.789163589477539, 'learning_rate': 1.0332332985438248e-05, 'epoch': 4.87}\n",
+ "{'loss': 0.7077, 'grad_norm': 5.632966995239258, 'learning_rate': 1.0018262287505086e-05, 'epoch': 4.89}\n",
+ "{'loss': 0.7339, 'grad_norm': 4.699968338012695, 'learning_rate': 9.708506720042932e-06, 'epoch': 4.91}\n",
+ "{'loss': 0.6334, 'grad_norm': 3.969327926635742, 'learning_rate': 9.403099714207175e-06, 'epoch': 4.93}\n",
+ "{'loss': 0.7298, 'grad_norm': 4.980201244354248, 'learning_rate': 9.102074231823727e-06, 'epoch': 4.94}\n",
+ "{'loss': 0.7236, 'grad_norm': 6.4100565910339355, 'learning_rate': 8.805462761831418e-06, 'epoch': 4.96}\n",
+ "{'loss': 0.7751, 'grad_norm': 5.446720600128174, 'learning_rate': 8.513297316775625e-06, 'epoch': 4.98}\n",
+ "{'loss': 0.7407, 'grad_norm': 4.180345058441162, 'learning_rate': 8.225609429353187e-06, 'epoch': 5.0}\n",
+ " 83%|██████████████████████████████▊ | 2800/3360 [1:21:52<16:57, 1.82s/it][INFO|trainer.py:3788] 2024-07-04 13:48:40,919 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-07-04 13:48:40,919 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-07-04 13:48:40,919 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 7%|██▊ | 3/46 [00:00<00:01, 28.49it/s]\u001b[A\n",
+ " 13%|█████▋ | 6/46 [00:00<00:01, 21.83it/s]\u001b[A\n",
+ " 20%|████████▌ | 9/46 [00:00<00:01, 20.40it/s]\u001b[A\n",
+ " 26%|███████████▏ | 12/46 [00:00<00:01, 19.64it/s]\u001b[A\n",
+ " 33%|██████████████ | 15/46 [00:00<00:01, 19.60it/s]\u001b[A\n",
+ " 37%|███████████████▉ | 17/46 [00:00<00:01, 19.23it/s]\u001b[A\n",
+ " 41%|█████████████████▊ | 19/46 [00:00<00:01, 18.37it/s]\u001b[A\n",
+ " 46%|███████████████████▋ | 21/46 [00:01<00:01, 18.58it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 23/46 [00:01<00:01, 18.77it/s]\u001b[A\n",
+ " 54%|███████████████████████▎ | 25/46 [00:01<00:01, 18.26it/s]\u001b[A\n",
+ " 59%|█████████████████████████▏ | 27/46 [00:01<00:01, 18.05it/s]\u001b[A\n",
+ " 63%|███████████████████████████ | 29/46 [00:01<00:00, 17.15it/s]\u001b[A\n",
+ " 67%|████████████████████████████▉ | 31/46 [00:01<00:00, 15.93it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▊ | 33/46 [00:01<00:00, 16.53it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 35/46 [00:01<00:00, 15.83it/s]\u001b[A\n",
+ " 80%|██████████████████████████████████▌ | 37/46 [00:02<00:00, 16.58it/s]\u001b[A\n",
+ " 85%|████████████████████████████████████▍ | 39/46 [00:02<00:00, 17.08it/s]\u001b[A\n",
+ " 89%|██████████████████████████████████████▎ | 41/46 [00:02<00:00, 17.52it/s]\u001b[A\n",
+ " 93%|████████████████████████████████████████▏ | 43/46 [00:02<00:00, 17.67it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 2.25398850440979, 'eval_runtime': 2.5926, 'eval_samples_per_second': 17.743, 'eval_steps_per_second': 17.743, 'epoch': 5.0}\n",
+ " 83%|██████████████████████████████▊ | 2800/3360 [1:21:54<16:57, 1.82s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:02<00:00, 17.88it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-07-04 13:48:43,514 >> Saving model checkpoint to saves/qwen2-1.5b/lora/sft/checkpoint-2800\n",
+ "[INFO|configuration_utils.py:733] 2024-07-04 13:48:44,254 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-04 13:48:44,254 >> Model config Qwen2Config {\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-07-04 13:48:44,307 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/checkpoint-2800/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-07-04 13:48:44,307 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/checkpoint-2800/special_tokens_map.json\n",
+ "{'loss': 0.5816, 'grad_norm': 3.6714768409729004, 'learning_rate': 7.942430149009161e-06, 'epoch': 5.02}\n",
+ "{'loss': 0.516, 'grad_norm': 3.9655864238739014, 'learning_rate': 7.663790038585793e-06, 'epoch': 5.03}\n",
+ "{'loss': 0.5876, 'grad_norm': 4.2771453857421875, 'learning_rate': 7.389719171023857e-06, 'epoch': 5.05}\n",
+ "{'loss': 0.5746, 'grad_norm': 5.545507431030273, 'learning_rate': 7.1202471261170245e-06, 'epoch': 5.07}\n",
+ "{'loss': 0.5789, 'grad_norm': 4.685436248779297, 'learning_rate': 6.855402987319348e-06, 'epoch': 5.09}\n",
+ "{'loss': 0.6558, 'grad_norm': 5.384147644042969, 'learning_rate': 6.595215338606397e-06, 'epoch': 5.1}\n",
+ "{'loss': 0.4955, 'grad_norm': 5.300227642059326, 'learning_rate': 6.339712261390213e-06, 'epoch': 5.12}\n",
+ "{'loss': 0.6284, 'grad_norm': 5.341045379638672, 'learning_rate': 6.088921331488568e-06, 'epoch': 5.14}\n",
+ "{'loss': 0.5285, 'grad_norm': 4.509070873260498, 'learning_rate': 5.8428696161488215e-06, 'epoch': 5.16}\n",
+ "{'loss': 0.5073, 'grad_norm': 4.6753339767456055, 'learning_rate': 5.601583671126531e-06, 'epoch': 5.18}\n",
+ "{'loss': 0.5849, 'grad_norm': 3.836711883544922, 'learning_rate': 5.365089537819434e-06, 'epoch': 5.19}\n",
+ "{'loss': 0.5714, 'grad_norm': 4.124776840209961, 'learning_rate': 5.133412740456806e-06, 'epoch': 5.21}\n",
+ "{'loss': 0.5811, 'grad_norm': 4.734057903289795, 'learning_rate': 4.906578283344759e-06, 'epoch': 5.23}\n",
+ "{'loss': 0.6351, 'grad_norm': 5.501781463623047, 'learning_rate': 4.684610648167503e-06, 'epoch': 5.25}\n",
+ "{'loss': 0.5622, 'grad_norm': 4.912986755371094, 'learning_rate': 4.467533791345191e-06, 'epoch': 5.27}\n",
+ "{'loss': 0.5305, 'grad_norm': 6.3503899574279785, 'learning_rate': 4.255371141448272e-06, 'epoch': 5.28}\n",
+ "{'loss': 0.5406, 'grad_norm': 4.923576354980469, 'learning_rate': 4.048145596668967e-06, 'epoch': 5.3}\n",
+ "{'loss': 0.5534, 'grad_norm': 4.20800256729126, 'learning_rate': 3.84587952234991e-06, 'epoch': 5.32}\n",
+ "{'loss': 0.4831, 'grad_norm': 4.633558750152588, 'learning_rate': 3.6485947485702832e-06, 'epoch': 5.34}\n",
+ "{'loss': 0.5587, 'grad_norm': 5.120583534240723, 'learning_rate': 3.4563125677897932e-06, 'epoch': 5.35}\n",
+ "{'loss': 0.5696, 'grad_norm': 5.966647148132324, 'learning_rate': 3.269053732550581e-06, 'epoch': 5.37}\n",
+ "{'loss': 0.5767, 'grad_norm': 5.047117233276367, 'learning_rate': 3.086838453237506e-06, 'epoch': 5.39}\n",
+ "{'loss': 0.4262, 'grad_norm': 4.478403091430664, 'learning_rate': 2.9096863958968268e-06, 'epoch': 5.41}\n",
+ "{'loss': 0.4798, 'grad_norm': 4.455025672912598, 'learning_rate': 2.737616680113758e-06, 'epoch': 5.43}\n",
+ "{'loss': 0.4574, 'grad_norm': 3.7917206287384033, 'learning_rate': 2.570647876948895e-06, 'epoch': 5.44}\n",
+ "{'loss': 0.5635, 'grad_norm': 7.098059177398682, 'learning_rate': 2.408798006933882e-06, 'epoch': 5.46}\n",
+ "{'loss': 0.7231, 'grad_norm': 4.642895698547363, 'learning_rate': 2.252084538126542e-06, 'epoch': 5.48}\n",
+ "{'loss': 0.5122, 'grad_norm': 5.233055591583252, 'learning_rate': 2.100524384225555e-06, 'epoch': 5.5}\n",
+ "{'loss': 0.524, 'grad_norm': 4.6845173835754395, 'learning_rate': 1.9541339027450256e-06, 'epoch': 5.52}\n",
+ "{'loss': 0.5816, 'grad_norm': 5.447011470794678, 'learning_rate': 1.8129288932490274e-06, 'epoch': 5.53}\n",
+ "{'loss': 0.5329, 'grad_norm': 3.755023717880249, 'learning_rate': 1.6769245956464396e-06, 'epoch': 5.55}\n",
+ "{'loss': 0.6767, 'grad_norm': 5.255481719970703, 'learning_rate': 1.5461356885461075e-06, 'epoch': 5.57}\n",
+ "{'loss': 0.5529, 'grad_norm': 4.8336567878723145, 'learning_rate': 1.4205762876726092e-06, 'epoch': 5.59}\n",
+ "{'loss': 0.6372, 'grad_norm': 5.332770824432373, 'learning_rate': 1.3002599443428243e-06, 'epoch': 5.6}\n",
+ "{'loss': 0.634, 'grad_norm': 5.157808780670166, 'learning_rate': 1.1851996440033319e-06, 'epoch': 5.62}\n",
+ "{'loss': 0.5033, 'grad_norm': 4.826900005340576, 'learning_rate': 1.0754078048289374e-06, 'epoch': 5.64}\n",
+ "{'loss': 0.5681, 'grad_norm': 3.9047048091888428, 'learning_rate': 9.708962763824048e-07, 'epoch': 5.66}\n",
+ "{'loss': 0.5432, 'grad_norm': 6.038053512573242, 'learning_rate': 8.716763383355864e-07, 'epoch': 5.68}\n",
+ "{'loss': 0.6018, 'grad_norm': 5.233924388885498, 'learning_rate': 7.777586992519959e-07, 'epoch': 5.69}\n",
+ "{'loss': 0.5367, 'grad_norm': 6.929383277893066, 'learning_rate': 6.891534954310885e-07, 'epoch': 5.71}\n",
+ "{'loss': 0.6039, 'grad_norm': 4.509579181671143, 'learning_rate': 6.058702898142643e-07, 'epoch': 5.73}\n",
+ "{'loss': 0.5292, 'grad_norm': 4.131773948669434, 'learning_rate': 5.279180709527765e-07, 'epoch': 5.75}\n",
+ "{'loss': 0.6327, 'grad_norm': 4.368628025054932, 'learning_rate': 4.553052520375911e-07, 'epoch': 5.77}\n",
+ "{'loss': 0.4859, 'grad_norm': 4.966446399688721, 'learning_rate': 3.8803966999139684e-07, 'epoch': 5.78}\n",
+ "{'loss': 0.5397, 'grad_norm': 5.083605766296387, 'learning_rate': 3.261285846227868e-07, 'epoch': 5.8}\n",
+ "{'loss': 0.4758, 'grad_norm': 4.257706165313721, 'learning_rate': 2.6957867784270787e-07, 'epoch': 5.82}\n",
+ "{'loss': 0.492, 'grad_norm': 5.183888912200928, 'learning_rate': 2.1839605294330933e-07, 'epoch': 5.84}\n",
+ "{'loss': 0.6466, 'grad_norm': 7.4429707527160645, 'learning_rate': 1.725862339392259e-07, 'epoch': 5.85}\n",
+ "{'loss': 0.4461, 'grad_norm': 6.51588249206543, 'learning_rate': 1.3215416497138754e-07, 'epoch': 5.87}\n",
+ "{'loss': 0.6614, 'grad_norm': 4.2303786277771, 'learning_rate': 9.710420977340762e-08, 'epoch': 5.89}\n",
+ "{'loss': 0.4817, 'grad_norm': 6.3713908195495605, 'learning_rate': 6.744015120061509e-08, 'epoch': 5.91}\n",
+ "{'loss': 0.6231, 'grad_norm': 10.188394546508789, 'learning_rate': 4.316519082179227e-08, 'epoch': 5.93}\n",
+ "{'loss': 0.5204, 'grad_norm': 4.387541770935059, 'learning_rate': 2.4281948573617874e-08, 'epoch': 5.94}\n",
+ "{'loss': 0.5938, 'grad_norm': 5.084804534912109, 'learning_rate': 1.0792462477909882e-08, 'epoch': 5.96}\n",
+ "{'loss': 0.576, 'grad_norm': 4.955512523651123, 'learning_rate': 2.6981884216847884e-09, 'epoch': 5.98}\n",
+ "{'loss': 0.5752, 'grad_norm': 4.1065449714660645, 'learning_rate': 0.0, 'epoch': 6.0}\n",
+ "100%|█████████████████████████████████████| 3360/3360 [1:38:23<00:00, 1.77s/it][INFO|trainer.py:3788] 2024-07-04 14:05:12,056 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-07-04 14:05:12,056 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-07-04 14:05:12,056 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 7%|██▊ | 3/46 [00:00<00:01, 29.94it/s]\u001b[A\n",
+ " 13%|█████▋ | 6/46 [00:00<00:01, 23.42it/s]\u001b[A\n",
+ " 20%|████████▌ | 9/46 [00:00<00:01, 21.97it/s]\u001b[A\n",
+ " 26%|███████████▏ | 12/46 [00:00<00:01, 21.18it/s]\u001b[A\n",
+ " 33%|██████████████ | 15/46 [00:00<00:01, 20.59it/s]\u001b[A\n",
+ " 39%|████████████████▊ | 18/46 [00:00<00:01, 20.24it/s]\u001b[A\n",
+ " 46%|███████████████████▋ | 21/46 [00:01<00:01, 19.57it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 23/46 [00:01<00:01, 19.09it/s]\u001b[A\n",
+ " 54%|███████████████████████▎ | 25/46 [00:01<00:01, 19.28it/s]\u001b[A\n",
+ " 61%|██████████████████████████▏ | 28/46 [00:01<00:00, 19.49it/s]\u001b[A\n",
+ " 65%|████████████████████████████ | 30/46 [00:01<00:00, 19.59it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▊ | 33/46 [00:01<00:00, 19.65it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 35/46 [00:01<00:00, 19.62it/s]\u001b[A\n",
+ " 83%|███████████████████████████████████▌ | 38/46 [00:01<00:00, 18.99it/s]\u001b[A\n",
+ " 87%|█████████████████████████████████████▍ | 40/46 [00:02<00:00, 18.90it/s]\u001b[A\n",
+ " 91%|███████████████████████████████████████▎ | 42/46 [00:02<00:00, 18.97it/s]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▏ | 44/46 [00:02<00:00, 19.10it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 2.437338352203369, 'eval_runtime': 2.3741, 'eval_samples_per_second': 19.376, 'eval_steps_per_second': 19.376, 'epoch': 6.0}\n",
+ "100%|█████████████████████████████████████| 3360/3360 [1:38:25<00:00, 1.77s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:02<00:00, 19.10it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-07-04 14:05:14,432 >> Saving model checkpoint to saves/qwen2-1.5b/lora/sft/checkpoint-3360\n",
+ "[INFO|configuration_utils.py:733] 2024-07-04 14:05:15,110 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-04 14:05:15,111 >> Model config Qwen2Config {\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-07-04 14:05:15,155 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/checkpoint-3360/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-07-04 14:05:15,155 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/checkpoint-3360/special_tokens_map.json\n",
+ "[INFO|trainer.py:2383] 2024-07-04 14:05:15,382 >> \n",
+ "\n",
+ "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
+ "\n",
+ "\n",
+ "{'train_runtime': 5911.7152, 'train_samples_per_second': 4.549, 'train_steps_per_second': 0.568, 'train_loss': 1.1251599807114827, 'epoch': 6.0}\n",
+ "100%|█████████████████████████████████████| 3360/3360 [1:38:26<00:00, 1.76s/it]\n",
+ "[INFO|trainer.py:3478] 2024-07-04 14:05:15,386 >> Saving model checkpoint to saves/qwen2-1.5b/lora/sft\n",
+ "[INFO|configuration_utils.py:733] 2024-07-04 14:05:16,251 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-04 14:05:16,251 >> Model config Qwen2Config {\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-07-04 14:05:16,306 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-07-04 14:05:16,306 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/special_tokens_map.json\n",
+ "***** train metrics *****\n",
+ " epoch = 5.9973\n",
+ " total_flos = 16732846GF\n",
+ " train_loss = 1.1252\n",
+ " train_runtime = 1:38:31.71\n",
+ " train_samples_per_second = 4.549\n",
+ " train_steps_per_second = 0.568\n",
+ "Figure saved at: saves/qwen2-1.5b/lora/sft/training_loss.png\n",
+ "Figure saved at: saves/qwen2-1.5b/lora/sft/training_eval_loss.png\n",
+ "[INFO|trainer.py:3788] 2024-07-04 14:05:16,625 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-07-04 14:05:16,625 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-07-04 14:05:16,625 >> Batch size = 1\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:02<00:00, 18.96it/s]\n",
+ "***** eval metrics *****\n",
+ " epoch = 5.9973\n",
+ " eval_loss = 2.4373\n",
+ " eval_runtime = 0:00:02.50\n",
+ " eval_samples_per_second = 18.363\n",
+ " eval_steps_per_second = 18.363\n",
+ "[INFO|modelcard.py:449] 2024-07-04 14:05:19,133 >> Dropping the following result as it does not have all the necessary fields:\n",
+ "{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}}\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: \\ 0.086 MB of 0.086 MB uploaded\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss ▁▁▂▄▆██\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime ▃▄▁▅█▁▅\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second ▆▅█▄▁█▄\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second ▆▅█▄▁█▄\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch ▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step ▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/grad_norm ▃▁▂▂▁▃▂▃▄▄▃▅▃▃▅▅▅▃▄▄▄▆▅▇▅▆▄▅█▅▇▆▅▇▅▆▆▆▆▅\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate ▂▄▅▇██████▇▇▇▇▇▆▆▆▆▅▅▅▄▄▄▃▃▃▃▂▂▂▂▁▁▁▁▁▁▁\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss █▇▇▇▆▆▆▆▆▆▅▅▆▅▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: \n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss 2.43734\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime 2.5051\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second 18.363\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second 18.363\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: total_flos 1.7966756916707328e+16\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch 5.99732\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step 3360\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/grad_norm 4.10654\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate 0.0\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss 0.5752\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train_loss 1.12516\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train_runtime 5911.7152\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train_samples_per_second 4.549\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train_steps_per_second 0.568\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: \n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mqwen2_1.5b_lora_sft\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface/runs/mpc5sxtf\u001b[0m\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at: \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface\u001b[0m\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 6 W&B file(s), 0 media file(s), 1 artifact file(s) and 0 other file(s)\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20240704_122645-mpc5sxtf/logs\u001b[0m\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m The new W&B backend becomes opt-out in version 0.18.0; try it out with `wandb.require(\"core\")`! See https://wandb.me/wandb-core for more information.\n",
+ "CPU times: user 1min 28s, sys: 26.5 s, total: 1min 54s\n",
+ "Wall time: 1h 42min 32s\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "!./scripts/tune-lf.sh config/qwen2_1.5b_lora_sft.yaml"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Current Directory:\n",
+ "/home/inflaton/code/projects/courses/llm-finetuning/llama-factory\n",
+ "07/04/2024 14:50:13 - WARNING - llamafactory.hparams.parser - We recommend enable `upcast_layernorm` in quantized training.\n",
+ "07/04/2024 14:50:13 - INFO - llamafactory.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, compute dtype: torch.bfloat16\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 14:50:14,466 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/vocab.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 14:50:14,466 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/merges.txt\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 14:50:14,466 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/tokenizer.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 14:50:14,466 >> loading file added_tokens.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 14:50:14,466 >> loading file special_tokens_map.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 14:50:14,467 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/tokenizer_config.json\n",
+ "[WARNING|logging.py:313] 2024-07-04 14:50:14,635 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "07/04/2024 14:50:14 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+ "07/04/2024 14:50:14 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+ "07/04/2024 14:50:14 - INFO - llamafactory.data.loader - Loading dataset alpaca_mac.json...\n",
+ "Converting format of dataset (num_proc=16): 100%|█| 4528/4528 [00:00<00:00, 1650\n",
+ "Running tokenizer on dataset (num_proc=16): 100%|█| 4528/4528 [00:01<00:00, 3163\n",
+ "input_ids:\n",
+ "[151644, 872, 198, 5501, 14683, 279, 2701, 8453, 1467, 1119, 6364, 323, 3410, 1172, 279, 24531, 2213, 11, 4302, 770, 624, 35987, 102895, 99164, 100324, 100717, 100095, 99509, 1773, 151645, 198, 151644, 77091, 198, 17949, 358, 572, 2617, 553, 264, 38835, 44486, 13, 151645]\n",
+ "inputs:\n",
+ "<|im_start|>user\n",
+ "Please translate the following Chinese text into English and provide only the translated content, nothing else.\n",
+ "全仗着狐仙搭救。<|im_end|>\n",
+ "<|im_start|>assistant\n",
+ "Because I was protected by a fox fairy.<|im_end|>\n",
+ "label_ids:\n",
+ "[-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 17949, 358, 572, 2617, 553, 264, 38835, 44486, 13, 151645]\n",
+ "labels:\n",
+ "Because I was protected by a fox fairy.<|im_end|>\n",
+ "[INFO|configuration_utils.py:733] 2024-07-04 14:50:17,794 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-04 14:50:17,795 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-7B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 3584,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 18944,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 28,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 4,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 131072,\n",
+ " \"tie_word_embeddings\": false,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 152064\n",
+ "}\n",
+ "\n",
+ "07/04/2024 14:50:17 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n",
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "config.json: 100%|█████████████████████████| 1.19k/1.19k [00:00<00:00, 12.3MB/s]\n",
+ "[INFO|configuration_utils.py:733] 2024-07-04 14:50:19,202 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-04 14:50:19,203 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"unsloth/qwen2-7b-instruct-bnb-4bit\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 3584,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 18944,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 28,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 4,\n",
+ " \"quantization_config\": {\n",
+ " \"_load_in_4bit\": true,\n",
+ " \"_load_in_8bit\": false,\n",
+ " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n",
+ " \"bnb_4bit_quant_storage\": \"uint8\",\n",
+ " \"bnb_4bit_quant_type\": \"nf4\",\n",
+ " \"bnb_4bit_use_double_quant\": true,\n",
+ " \"llm_int8_enable_fp32_cpu_offload\": false,\n",
+ " \"llm_int8_has_fp16_weight\": false,\n",
+ " \"llm_int8_skip_modules\": null,\n",
+ " \"llm_int8_threshold\": 6.0,\n",
+ " \"load_in_4bit\": true,\n",
+ " \"load_in_8bit\": false,\n",
+ " \"quant_method\": \"bitsandbytes\"\n",
+ " },\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 131072,\n",
+ " \"tie_word_embeddings\": false,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 152064\n",
+ "}\n",
+ "\n",
+ "==((====))== Unsloth: Fast Qwen2 patching release 2024.6\n",
+ " \\\\ /| GPU: NVIDIA GeForce RTX 4080 Laptop GPU. Max memory: 11.994 GB. Platform = Linux.\n",
+ "O^O/ \\_/ \\ Pytorch: 2.3.0+cu121. CUDA = 8.9. CUDA Toolkit = 12.1.\n",
+ "\\ / Bfloat16 = TRUE. Xformers = 0.0.26.post1. FA = False.\n",
+ " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n",
+ "[INFO|configuration_utils.py:733] 2024-07-04 14:50:20,339 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-04 14:50:20,340 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"unsloth/qwen2-7b-instruct-bnb-4bit\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 3584,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 18944,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 28,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 4,\n",
+ " \"quantization_config\": {\n",
+ " \"_load_in_4bit\": true,\n",
+ " \"_load_in_8bit\": false,\n",
+ " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n",
+ " \"bnb_4bit_quant_storage\": \"uint8\",\n",
+ " \"bnb_4bit_quant_type\": \"nf4\",\n",
+ " \"bnb_4bit_use_double_quant\": true,\n",
+ " \"llm_int8_enable_fp32_cpu_offload\": false,\n",
+ " \"llm_int8_has_fp16_weight\": false,\n",
+ " \"llm_int8_skip_modules\": null,\n",
+ " \"llm_int8_threshold\": 6.0,\n",
+ " \"load_in_4bit\": true,\n",
+ " \"load_in_8bit\": false,\n",
+ " \"quant_method\": \"bitsandbytes\"\n",
+ " },\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 131072,\n",
+ " \"tie_word_embeddings\": false,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 152064\n",
+ "}\n",
+ "\n",
+ "[INFO|configuration_utils.py:733] 2024-07-04 14:50:20,992 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-04 14:50:20,993 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"unsloth/qwen2-7b-instruct-bnb-4bit\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 3584,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 18944,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 28,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 4,\n",
+ " \"quantization_config\": {\n",
+ " \"_load_in_4bit\": true,\n",
+ " \"_load_in_8bit\": false,\n",
+ " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n",
+ " \"bnb_4bit_quant_storage\": \"uint8\",\n",
+ " \"bnb_4bit_quant_type\": \"nf4\",\n",
+ " \"bnb_4bit_use_double_quant\": true,\n",
+ " \"llm_int8_enable_fp32_cpu_offload\": false,\n",
+ " \"llm_int8_has_fp16_weight\": false,\n",
+ " \"llm_int8_skip_modules\": null,\n",
+ " \"llm_int8_threshold\": 6.0,\n",
+ " \"load_in_4bit\": true,\n",
+ " \"load_in_8bit\": false,\n",
+ " \"quant_method\": \"bitsandbytes\"\n",
+ " },\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 131072,\n",
+ " \"tie_word_embeddings\": false,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 152064\n",
+ "}\n",
+ "\n",
+ "model.safetensors: 100%|███████████████████| 5.55G/5.55G [31:00<00:00, 2.98MB/s]\n",
+ "[INFO|modeling_utils.py:3556] 2024-07-04 15:21:22,487 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/model.safetensors\n",
+ "[INFO|modeling_utils.py:1531] 2024-07-04 15:21:26,212 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+ "[INFO|configuration_utils.py:1000] 2024-07-04 15:21:26,219 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645\n",
+ "}\n",
+ "\n",
+ "[INFO|modeling_utils.py:4364] 2024-07-04 15:26:00,017 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+ "\n",
+ "[INFO|modeling_utils.py:4372] 2024-07-04 15:26:00,018 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at unsloth/qwen2-7b-instruct-bnb-4bit.\n",
+ "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+ "generation_config.json: 100%|██████████████████| 243/243 [00:00<00:00, 3.75MB/s]\n",
+ "[INFO|configuration_utils.py:955] 2024-07-04 15:26:01,541 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/generation_config.json\n",
+ "[INFO|configuration_utils.py:1000] 2024-07-04 15:26:01,542 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"do_sample\": true,\n",
+ " \"eos_token_id\": [\n",
+ " 151645,\n",
+ " 151643\n",
+ " ],\n",
+ " \"pad_token_id\": 151643,\n",
+ " \"repetition_penalty\": 1.05,\n",
+ " \"temperature\": 0.7,\n",
+ " \"top_k\": 20,\n",
+ " \"top_p\": 0.8\n",
+ "}\n",
+ "\n",
+ "tokenizer_config.json: 100%|███████████████| 1.33k/1.33k [00:00<00:00, 19.0MB/s]\n",
+ "vocab.json: 100%|██████████████████████████| 2.78M/2.78M [00:01<00:00, 1.75MB/s]\n",
+ "merges.txt: 100%|██████████████████████████| 1.67M/1.67M [00:00<00:00, 1.89MB/s]\n",
+ "added_tokens.json: 100%|█████████████████████| 80.0/80.0 [00:00<00:00, 1.29MB/s]\n",
+ "special_tokens_map.json: 100%|█████████████████| 367/367 [00:00<00:00, 6.11MB/s]\n",
+ "tokenizer.json: 100%|██████████████████████| 7.03M/7.03M [00:02<00:00, 3.09MB/s]\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 15:26:12,737 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/vocab.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 15:26:12,737 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/merges.txt\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 15:26:12,737 >> loading file added_tokens.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/added_tokens.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 15:26:12,737 >> loading file special_tokens_map.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/special_tokens_map.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 15:26:12,737 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 15:26:12,737 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/tokenizer.json\n",
+ "[WARNING|logging.py:313] 2024-07-04 15:26:12,946 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 15:26:13,696 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/vocab.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 15:26:13,696 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/merges.txt\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 15:26:13,696 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/tokenizer.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 15:26:13,696 >> loading file added_tokens.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/added_tokens.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 15:26:13,696 >> loading file special_tokens_map.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/special_tokens_map.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 15:26:13,696 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/tokenizer_config.json\n",
+ "[WARNING|logging.py:313] 2024-07-04 15:26:13,877 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "07/04/2024 15:26:14 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.\n",
+ "07/04/2024 15:26:14 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.\n",
+ "07/04/2024 15:26:14 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA\n",
+ "07/04/2024 15:26:14 - INFO - llamafactory.model.model_utils.misc - Found linear modules: gate_proj,o_proj,v_proj,k_proj,up_proj,q_proj,down_proj\n",
+ "[WARNING|logging.py:328] 2024-07-04 15:26:15,372 >> Unsloth 2024.6 patched 28 layers with 0 QKV layers, 28 O layers and 28 MLP layers.\n",
+ "07/04/2024 15:26:16 - INFO - llamafactory.model.loader - trainable params: 20,185,088 || all params: 7,635,801,600 || trainable%: 0.2643\n",
+ "[INFO|trainer.py:642] 2024-07-04 15:26:16,270 >> Using auto half precision backend\n",
+ "07/04/2024 15:26:16 - INFO - llamafactory.train.trainer_utils - Using LoRA+ optimizer with loraplus lr ratio 16.00.\n",
+ "[WARNING|:223] 2024-07-04 15:26:16,423 >> ==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n",
+ " \\\\ /| Num examples = 4,482 | Num Epochs = 6\n",
+ "O^O/ \\_/ \\ Batch size per device = 1 | Gradient Accumulation steps = 8\n",
+ "\\ / Total batch size = 8 | Total steps = 3,360\n",
+ " \"-____-\" Number of trainable parameters = 20,185,088\n",
+ "[INFO|integration_utils.py:750] 2024-07-04 15:26:16,929 >> Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33minflaton-sg\u001b[0m (\u001b[33minflaton-ai\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.17.4\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m/home/inflaton/code/projects/courses/llm-finetuning/llama-factory/wandb/run-20240704_152618-o710838e\u001b[0m\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mqwen2_7b_lora_sft\u001b[0m\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface\u001b[0m\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface/runs/o710838e\u001b[0m\n",
+ "{'loss': 2.1957, 'grad_norm': 2.977725028991699, 'learning_rate': 2.9761904761904763e-06, 'epoch': 0.02}\n",
+ "{'loss': 1.9984, 'grad_norm': 1.17664635181427, 'learning_rate': 5.9523809523809525e-06, 'epoch': 0.04}\n",
+ "{'loss': 1.7375, 'grad_norm': 0.7683635354042053, 'learning_rate': 8.92857142857143e-06, 'epoch': 0.05}\n",
+ "{'loss': 1.7268, 'grad_norm': 1.5277972221374512, 'learning_rate': 1.1904761904761905e-05, 'epoch': 0.07}\n",
+ "{'loss': 1.7873, 'grad_norm': 0.7151318788528442, 'learning_rate': 1.4880952380952381e-05, 'epoch': 0.09}\n",
+ "{'loss': 1.6224, 'grad_norm': 0.7458081841468811, 'learning_rate': 1.785714285714286e-05, 'epoch': 0.11}\n",
+ "{'loss': 1.7345, 'grad_norm': 0.7242929339408875, 'learning_rate': 2.0833333333333336e-05, 'epoch': 0.12}\n",
+ "{'loss': 1.57, 'grad_norm': 0.8281179666519165, 'learning_rate': 2.380952380952381e-05, 'epoch': 0.14}\n",
+ "{'loss': 1.6718, 'grad_norm': 1.0110186338424683, 'learning_rate': 2.6785714285714288e-05, 'epoch': 0.16}\n",
+ "{'loss': 1.6219, 'grad_norm': 0.8258731961250305, 'learning_rate': 2.9761904761904762e-05, 'epoch': 0.18}\n",
+ "{'loss': 1.6115, 'grad_norm': 0.9346244931221008, 'learning_rate': 3.273809523809524e-05, 'epoch': 0.2}\n",
+ "{'loss': 1.6081, 'grad_norm': 1.0503712892532349, 'learning_rate': 3.571428571428572e-05, 'epoch': 0.21}\n",
+ "{'loss': 1.5874, 'grad_norm': 1.1157383918762207, 'learning_rate': 3.8690476190476195e-05, 'epoch': 0.23}\n",
+ "{'loss': 1.5825, 'grad_norm': 1.212875485420227, 'learning_rate': 4.166666666666667e-05, 'epoch': 0.25}\n",
+ "{'loss': 1.512, 'grad_norm': 1.073009967803955, 'learning_rate': 4.464285714285715e-05, 'epoch': 0.27}\n",
+ "{'loss': 1.5074, 'grad_norm': 0.8304378390312195, 'learning_rate': 4.761904761904762e-05, 'epoch': 0.29}\n",
+ "{'loss': 1.6019, 'grad_norm': 0.8581129908561707, 'learning_rate': 5.05952380952381e-05, 'epoch': 0.3}\n",
+ "{'loss': 1.4319, 'grad_norm': 1.027076244354248, 'learning_rate': 5.3571428571428575e-05, 'epoch': 0.32}\n",
+ "{'loss': 1.555, 'grad_norm': 1.3464545011520386, 'learning_rate': 5.6547619047619046e-05, 'epoch': 0.34}\n",
+ "{'loss': 1.6637, 'grad_norm': 1.2714892625808716, 'learning_rate': 5.9523809523809524e-05, 'epoch': 0.36}\n",
+ "{'loss': 1.6159, 'grad_norm': 1.0014649629592896, 'learning_rate': 6.25e-05, 'epoch': 0.37}\n",
+ "{'loss': 1.5019, 'grad_norm': 1.4355653524398804, 'learning_rate': 6.547619047619048e-05, 'epoch': 0.39}\n",
+ "{'loss': 1.5167, 'grad_norm': 1.2876572608947754, 'learning_rate': 6.845238095238096e-05, 'epoch': 0.41}\n",
+ "{'loss': 1.6807, 'grad_norm': 1.4459688663482666, 'learning_rate': 7.142857142857143e-05, 'epoch': 0.43}\n",
+ "{'loss': 1.6053, 'grad_norm': 1.7381216287612915, 'learning_rate': 7.440476190476191e-05, 'epoch': 0.45}\n",
+ "{'loss': 1.4993, 'grad_norm': 1.516874074935913, 'learning_rate': 7.738095238095239e-05, 'epoch': 0.46}\n",
+ "{'loss': 1.58, 'grad_norm': 1.7755393981933594, 'learning_rate': 8.035714285714287e-05, 'epoch': 0.48}\n",
+ "{'loss': 1.5699, 'grad_norm': 1.7302135229110718, 'learning_rate': 8.333333333333334e-05, 'epoch': 0.5}\n",
+ "{'loss': 1.419, 'grad_norm': 1.172330617904663, 'learning_rate': 8.630952380952382e-05, 'epoch': 0.52}\n",
+ "{'loss': 1.5505, 'grad_norm': 1.676744818687439, 'learning_rate': 8.92857142857143e-05, 'epoch': 0.54}\n",
+ "{'loss': 1.5749, 'grad_norm': 1.8019312620162964, 'learning_rate': 9.226190476190478e-05, 'epoch': 0.55}\n",
+ "{'loss': 1.4185, 'grad_norm': 2.2339751720428467, 'learning_rate': 9.523809523809524e-05, 'epoch': 0.57}\n",
+ "{'loss': 1.4871, 'grad_norm': 1.8845446109771729, 'learning_rate': 9.821428571428572e-05, 'epoch': 0.59}\n",
+ "{'loss': 1.4547, 'grad_norm': 1.5382771492004395, 'learning_rate': 9.999956828659095e-05, 'epoch': 0.61}\n",
+ "{'loss': 1.5409, 'grad_norm': 2.5924744606018066, 'learning_rate': 9.999471159635539e-05, 'epoch': 0.62}\n",
+ "{'loss': 1.4544, 'grad_norm': 1.6850535869598389, 'learning_rate': 9.998445910004082e-05, 'epoch': 0.64}\n",
+ "{'loss': 1.6716, 'grad_norm': 2.093435287475586, 'learning_rate': 9.996881190417393e-05, 'epoch': 0.66}\n",
+ "{'loss': 1.5389, 'grad_norm': 1.9192240238189697, 'learning_rate': 9.994777169751806e-05, 'epoch': 0.68}\n",
+ "{'loss': 1.3255, 'grad_norm': 1.820000410079956, 'learning_rate': 9.992134075089084e-05, 'epoch': 0.7}\n",
+ "{'loss': 1.4784, 'grad_norm': 1.8777908086776733, 'learning_rate': 9.988952191691925e-05, 'epoch': 0.71}\n",
+ "{'loss': 1.5354, 'grad_norm': 1.7081478834152222, 'learning_rate': 9.985231862973168e-05, 'epoch': 0.73}\n",
+ "{'loss': 1.5822, 'grad_norm': 1.6461598873138428, 'learning_rate': 9.980973490458728e-05, 'epoch': 0.75}\n",
+ "{'loss': 1.5233, 'grad_norm': 2.1327311992645264, 'learning_rate': 9.976177533744261e-05, 'epoch': 0.77}\n",
+ "{'loss': 1.4739, 'grad_norm': 2.4746365547180176, 'learning_rate': 9.97084451044556e-05, 'epoch': 0.79}\n",
+ "{'loss': 1.4276, 'grad_norm': 1.7821303606033325, 'learning_rate': 9.964974996142698e-05, 'epoch': 0.8}\n",
+ "{'loss': 1.4803, 'grad_norm': 1.551522970199585, 'learning_rate': 9.958569624317893e-05, 'epoch': 0.82}\n",
+ "{'loss': 1.5314, 'grad_norm': 2.6767489910125732, 'learning_rate': 9.951629086287151e-05, 'epoch': 0.84}\n",
+ "{'loss': 1.4844, 'grad_norm': 1.7266111373901367, 'learning_rate': 9.944154131125642e-05, 'epoch': 0.86}\n",
+ "{'loss': 1.5248, 'grad_norm': 1.7948070764541626, 'learning_rate': 9.936145565586871e-05, 'epoch': 0.87}\n",
+ "{'loss': 1.6563, 'grad_norm': 1.6244261264801025, 'learning_rate': 9.927604254015585e-05, 'epoch': 0.89}\n",
+ "{'loss': 1.5928, 'grad_norm': 1.7924832105636597, 'learning_rate': 9.918531118254507e-05, 'epoch': 0.91}\n",
+ "{'loss': 1.4955, 'grad_norm': 2.337216377258301, 'learning_rate': 9.90892713754483e-05, 'epoch': 0.93}\n",
+ "{'loss': 1.558, 'grad_norm': 2.165968179702759, 'learning_rate': 9.898793348420536e-05, 'epoch': 0.95}\n",
+ "{'loss': 1.5148, 'grad_norm': 1.7740817070007324, 'learning_rate': 9.888130844596524e-05, 'epoch': 0.96}\n",
+ "{'loss': 1.5339, 'grad_norm': 2.276500940322876, 'learning_rate': 9.876940776850569e-05, 'epoch': 0.98}\n",
+ "{'loss': 1.4748, 'grad_norm': 1.852982521057129, 'learning_rate': 9.865224352899119e-05, 'epoch': 1.0}\n",
+ " 17%|██████ | 560/3360 [1:04:27<5:48:51, 7.48s/it][INFO|trainer.py:3788] 2024-07-04 16:30:50,001 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-07-04 16:30:50,003 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-07-04 16:30:50,003 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 4%|█▉ | 2/46 [00:00<00:04, 9.82it/s]\u001b[A\n",
+ " 7%|██▊ | 3/46 [00:00<00:06, 6.46it/s]\u001b[A\n",
+ " 9%|███▊ | 4/46 [00:00<00:08, 5.15it/s]\u001b[A\n",
+ " 11%|████▊ | 5/46 [00:00<00:08, 5.06it/s]\u001b[A\n",
+ " 13%|█████▋ | 6/46 [00:01<00:07, 5.10it/s]\u001b[A\n",
+ " 15%|██████▋ | 7/46 [00:01<00:08, 4.43it/s]\u001b[A\n",
+ " 17%|███████▋ | 8/46 [00:01<00:09, 4.07it/s]\u001b[A\n",
+ " 20%|████████▌ | 9/46 [00:01<00:09, 3.75it/s]\u001b[A\n",
+ " 22%|█████████▎ | 10/46 [00:02<00:10, 3.36it/s]\u001b[A\n",
+ " 24%|██████████▎ | 11/46 [00:02<00:10, 3.20it/s]\u001b[A\n",
+ " 26%|███████████▏ | 12/46 [00:03<00:10, 3.20it/s]\u001b[A\n",
+ " 28%|████████████▏ | 13/46 [00:03<00:09, 3.38it/s]\u001b[A\n",
+ " 30%|█████████████ | 14/46 [00:03<00:09, 3.53it/s]\u001b[A\n",
+ " 33%|██████████████ | 15/46 [00:03<00:08, 3.54it/s]\u001b[A\n",
+ " 35%|██████████████▉ | 16/46 [00:04<00:09, 3.30it/s]\u001b[A\n",
+ " 37%|███████████████▉ | 17/46 [00:04<00:09, 3.03it/s]\u001b[A\n",
+ " 39%|████████████████▊ | 18/46 [00:05<00:10, 2.72it/s]\u001b[A\n",
+ " 41%|█████████████████▊ | 19/46 [00:05<00:10, 2.64it/s]\u001b[A\n",
+ " 43%|██████████████████▋ | 20/46 [00:05<00:10, 2.46it/s]\u001b[A\n",
+ " 46%|███████████████████▋ | 21/46 [00:06<00:10, 2.36it/s]\u001b[A\n",
+ " 48%|████████████████████▌ | 22/46 [00:06<00:09, 2.46it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 23/46 [00:06<00:08, 2.76it/s]\u001b[A\n",
+ " 52%|██████████████████████▍ | 24/46 [00:07<00:06, 3.19it/s]\u001b[A\n",
+ " 54%|███████████████████████▎ | 25/46 [00:07<00:05, 3.75it/s]\u001b[A\n",
+ " 57%|████████████████████████▎ | 26/46 [00:07<00:04, 4.31it/s]\u001b[A\n",
+ " 59%|█████████████████████████▏ | 27/46 [00:07<00:03, 4.93it/s]\u001b[A\n",
+ " 61%|██████████████████████████▏ | 28/46 [00:07<00:03, 5.49it/s]\u001b[A\n",
+ " 63%|███████████████████████████ | 29/46 [00:07<00:02, 5.92it/s]\u001b[A\n",
+ " 65%|████████████████████████████ | 30/46 [00:08<00:02, 6.33it/s]\u001b[A\n",
+ " 67%|████████████████████████████▉ | 31/46 [00:08<00:02, 6.71it/s]\u001b[A\n",
+ " 70%|█████████████████████████████▉ | 32/46 [00:08<00:02, 6.80it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▊ | 33/46 [00:08<00:01, 6.67it/s]\u001b[A\n",
+ " 74%|███████████████████████████████▊ | 34/46 [00:08<00:01, 6.77it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 35/46 [00:08<00:01, 6.46it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▋ | 36/46 [00:08<00:01, 5.78it/s]\u001b[A\n",
+ " 80%|██████████████████████████████████▌ | 37/46 [00:10<00:03, 2.28it/s]\u001b[A\n",
+ " 83%|███████████████████████████████████▌ | 38/46 [00:11<00:05, 1.57it/s]\u001b[A\n",
+ " 85%|████████████████████████████████████▍ | 39/46 [00:11<00:03, 1.87it/s]\u001b[A\n",
+ " 87%|█████████████████████████████████████▍ | 40/46 [00:11<00:02, 2.15it/s]\u001b[A\n",
+ " 89%|██████████████████████████████████████▎ | 41/46 [00:13<00:04, 1.13it/s]\u001b[A\n",
+ " 91%|███████████████████████████████████████▎ | 42/46 [00:13<00:02, 1.39it/s]\u001b[A\n",
+ " 93%|████████████████████████████████████████▏ | 43/46 [00:14<00:01, 1.80it/s]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▏ | 44/46 [00:14<00:00, 2.28it/s]\u001b[A\n",
+ " 98%|██████████████████████████████████████████ | 45/46 [00:14<00:00, 2.82it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 1.5079401731491089, 'eval_runtime': 14.9104, 'eval_samples_per_second': 3.085, 'eval_steps_per_second': 3.085, 'epoch': 1.0}\n",
+ " 17%|██████ | 560/3360 [1:04:42<5:48:51, 7.48s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:14<00:00, 3.42it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-07-04 16:31:04,915 >> Saving model checkpoint to saves/qwen2-7b/lora/sft/checkpoint-560\n",
+ "[INFO|configuration_utils.py:733] 2024-07-04 16:31:06,164 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-04 16:31:06,165 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-7B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 3584,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 18944,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 28,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 4,\n",
+ " \"quantization_config\": {\n",
+ " \"_load_in_4bit\": true,\n",
+ " \"_load_in_8bit\": false,\n",
+ " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n",
+ " \"bnb_4bit_quant_storage\": \"uint8\",\n",
+ " \"bnb_4bit_quant_type\": \"nf4\",\n",
+ " \"bnb_4bit_use_double_quant\": true,\n",
+ " \"llm_int8_enable_fp32_cpu_offload\": false,\n",
+ " \"llm_int8_has_fp16_weight\": false,\n",
+ " \"llm_int8_skip_modules\": null,\n",
+ " \"llm_int8_threshold\": 6.0,\n",
+ " \"load_in_4bit\": true,\n",
+ " \"load_in_8bit\": false,\n",
+ " \"quant_method\": \"bitsandbytes\"\n",
+ " },\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 131072,\n",
+ " \"tie_word_embeddings\": false,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 152064\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-07-04 16:31:06,584 >> tokenizer config file saved in saves/qwen2-7b/lora/sft/checkpoint-560/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-07-04 16:31:06,585 >> Special tokens file saved in saves/qwen2-7b/lora/sft/checkpoint-560/special_tokens_map.json\n",
+ "{'loss': 1.0693, 'grad_norm': 2.4884133338928223, 'learning_rate': 9.852982837266955e-05, 'epoch': 1.02}\n",
+ "{'loss': 0.9156, 'grad_norm': 2.0713613033294678, 'learning_rate': 9.840217551150706e-05, 'epoch': 1.04}\n",
+ "{'loss': 1.0533, 'grad_norm': 2.160870313644409, 'learning_rate': 9.826929872276255e-05, 'epoch': 1.05}\n",
+ "{'loss': 0.9734, 'grad_norm': 2.251491069793701, 'learning_rate': 9.81312123475006e-05, 'epoch': 1.07}\n",
+ "{'loss': 0.925, 'grad_norm': 2.160745859146118, 'learning_rate': 9.798793128904356e-05, 'epoch': 1.09}\n",
+ "{'loss': 1.0312, 'grad_norm': 2.9455161094665527, 'learning_rate': 9.78394710113631e-05, 'epoch': 1.11}\n",
+ "{'loss': 0.9867, 'grad_norm': 1.834627628326416, 'learning_rate': 9.768584753741134e-05, 'epoch': 1.12}\n",
+ "{'loss': 0.9962, 'grad_norm': 2.891728401184082, 'learning_rate': 9.752707744739145e-05, 'epoch': 1.14}\n",
+ "{'loss': 1.0046, 'grad_norm': 2.459664821624756, 'learning_rate': 9.736317787696816e-05, 'epoch': 1.16}\n",
+ "{'loss': 0.872, 'grad_norm': 2.503146171569824, 'learning_rate': 9.719416651541839e-05, 'epoch': 1.18}\n",
+ "{'loss': 0.9536, 'grad_norm': 1.9054204225540161, 'learning_rate': 9.702006160372209e-05, 'epoch': 1.2}\n",
+ "{'loss': 0.9768, 'grad_norm': 2.08803129196167, 'learning_rate': 9.684088193259355e-05, 'epoch': 1.21}\n",
+ "{'loss': 0.9448, 'grad_norm': 2.4227285385131836, 'learning_rate': 9.665664684045333e-05, 'epoch': 1.23}\n",
+ "{'loss': 1.0078, 'grad_norm': 2.396881103515625, 'learning_rate': 9.646737621134112e-05, 'epoch': 1.25}\n",
+ "{'loss': 0.9285, 'grad_norm': 4.0550384521484375, 'learning_rate': 9.627309047276974e-05, 'epoch': 1.27}\n",
+ "{'loss': 1.0518, 'grad_norm': 3.4381208419799805, 'learning_rate': 9.607381059352038e-05, 'epoch': 1.29}\n",
+ "{'loss': 1.0221, 'grad_norm': 2.341543674468994, 'learning_rate': 9.586955808137958e-05, 'epoch': 1.3}\n",
+ "{'loss': 1.0084, 'grad_norm': 2.660717725753784, 'learning_rate': 9.566035498081784e-05, 'epoch': 1.32}\n",
+ "{'loss': 1.0374, 'grad_norm': 2.4253923892974854, 'learning_rate': 9.544622387061055e-05, 'epoch': 1.34}\n",
+ "{'loss': 0.8872, 'grad_norm': 3.2932205200195312, 'learning_rate': 9.522718786140097e-05, 'epoch': 1.36}\n",
+ "{'loss': 1.0013, 'grad_norm': 3.3068909645080566, 'learning_rate': 9.500327059320606e-05, 'epoch': 1.37}\n",
+ "{'loss': 0.9135, 'grad_norm': 3.9048690795898438, 'learning_rate': 9.477449623286505e-05, 'epoch': 1.39}\n",
+ "{'loss': 0.8808, 'grad_norm': 2.9740893840789795, 'learning_rate': 9.454088947143116e-05, 'epoch': 1.41}\n",
+ "{'loss': 1.0511, 'grad_norm': 3.2612483501434326, 'learning_rate': 9.430247552150673e-05, 'epoch': 1.43}\n",
+ "{'loss': 0.9457, 'grad_norm': 2.8854198455810547, 'learning_rate': 9.405928011452211e-05, 'epoch': 1.45}\n",
+ "{'loss': 0.9401, 'grad_norm': 2.1029069423675537, 'learning_rate': 9.381132949795861e-05, 'epoch': 1.46}\n",
+ "{'loss': 1.0803, 'grad_norm': 3.1445486545562744, 'learning_rate': 9.35586504325155e-05, 'epoch': 1.48}\n",
+ "{'loss': 0.9944, 'grad_norm': 3.0867714881896973, 'learning_rate': 9.330127018922194e-05, 'epoch': 1.5}\n",
+ "{'loss': 0.8916, 'grad_norm': 2.589761257171631, 'learning_rate': 9.303921654649362e-05, 'epoch': 1.52}\n",
+ "{'loss': 0.988, 'grad_norm': 2.9633171558380127, 'learning_rate': 9.277251778713474e-05, 'epoch': 1.54}\n",
+ "{'loss': 0.9376, 'grad_norm': 3.082129716873169, 'learning_rate': 9.250120269528546e-05, 'epoch': 1.55}\n",
+ "{'loss': 0.9333, 'grad_norm': 2.1602373123168945, 'learning_rate': 9.22253005533154e-05, 'epoch': 1.57}\n",
+ "{'loss': 1.0027, 'grad_norm': 2.900174617767334, 'learning_rate': 9.194484113866313e-05, 'epoch': 1.59}\n",
+ "{'loss': 1.1305, 'grad_norm': 3.4030845165252686, 'learning_rate': 9.165985472062246e-05, 'epoch': 1.61}\n",
+ "{'loss': 0.8973, 'grad_norm': 2.5629944801330566, 'learning_rate': 9.137037205707552e-05, 'epoch': 1.62}\n",
+ "{'loss': 0.9483, 'grad_norm': 3.2390940189361572, 'learning_rate': 9.107642439117321e-05, 'epoch': 1.64}\n",
+ "{'loss': 0.9879, 'grad_norm': 2.5794193744659424, 'learning_rate': 9.077804344796302e-05, 'epoch': 1.66}\n",
+ "{'loss': 0.9668, 'grad_norm': 2.389864444732666, 'learning_rate': 9.04752614309652e-05, 'epoch': 1.68}\n",
+ "{'loss': 0.8852, 'grad_norm': 3.5650432109832764, 'learning_rate': 9.01681110186971e-05, 'epoch': 1.7}\n",
+ "{'loss': 0.9984, 'grad_norm': 3.166510581970215, 'learning_rate': 8.985662536114613e-05, 'epoch': 1.71}\n",
+ "{'loss': 0.9526, 'grad_norm': 2.176490306854248, 'learning_rate': 8.954083807619208e-05, 'epoch': 1.73}\n",
+ "{'loss': 1.0377, 'grad_norm': 3.0157470703125, 'learning_rate': 8.922078324597879e-05, 'epoch': 1.75}\n",
+ "{'loss': 1.1106, 'grad_norm': 2.781142234802246, 'learning_rate': 8.889649541323574e-05, 'epoch': 1.77}\n",
+ "{'loss': 1.0373, 'grad_norm': 3.456441879272461, 'learning_rate': 8.856800957755e-05, 'epoch': 1.78}\n",
+ "{'loss': 1.0307, 'grad_norm': 3.646578311920166, 'learning_rate': 8.823536119158864e-05, 'epoch': 1.8}\n",
+ "{'loss': 0.9769, 'grad_norm': 3.4664463996887207, 'learning_rate': 8.789858615727265e-05, 'epoch': 1.82}\n",
+ "{'loss': 0.9524, 'grad_norm': 2.52860951423645, 'learning_rate': 8.755772082190194e-05, 'epoch': 1.84}\n",
+ "{'loss': 1.0686, 'grad_norm': 3.0946435928344727, 'learning_rate': 8.721280197423258e-05, 'epoch': 1.86}\n",
+ "{'loss': 0.9359, 'grad_norm': 3.146989583969116, 'learning_rate': 8.68638668405062e-05, 'epoch': 1.87}\n",
+ "{'loss': 1.0035, 'grad_norm': 3.2309892177581787, 'learning_rate': 8.651095308043232e-05, 'epoch': 1.89}\n",
+ "{'loss': 1.0669, 'grad_norm': 3.8748905658721924, 'learning_rate': 8.61540987831238e-05, 'epoch': 1.91}\n",
+ "{'loss': 1.0676, 'grad_norm': 3.329939603805542, 'learning_rate': 8.579334246298593e-05, 'epoch': 1.93}\n",
+ "{'loss': 0.9976, 'grad_norm': 3.7491514682769775, 'learning_rate': 8.542872305555978e-05, 'epoch': 1.95}\n",
+ "{'loss': 0.9471, 'grad_norm': 3.245119571685791, 'learning_rate': 8.50602799133199e-05, 'epoch': 1.96}\n",
+ "{'loss': 0.9998, 'grad_norm': 2.7840590476989746, 'learning_rate': 8.468805280142709e-05, 'epoch': 1.98}\n",
+ "{'loss': 1.0361, 'grad_norm': 3.2855234146118164, 'learning_rate': 8.43120818934367e-05, 'epoch': 2.0}\n",
+ " 33%|███████████▋ | 1120/3360 [2:09:53<4:22:41, 7.04s/it][INFO|trainer.py:3788] 2024-07-04 17:36:15,576 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-07-04 17:36:15,578 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-07-04 17:36:15,580 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 4%|█▉ | 2/46 [00:00<00:13, 3.20it/s]\u001b[A\n",
+ " 7%|██▊ | 3/46 [00:00<00:14, 3.05it/s]\u001b[A\n",
+ " 9%|███▊ | 4/46 [00:01<00:10, 3.85it/s]\u001b[A\n",
+ " 11%|████▊ | 5/46 [00:01<00:08, 4.72it/s]\u001b[A\n",
+ " 13%|█████▋ | 6/46 [00:01<00:07, 5.37it/s]\u001b[A\n",
+ " 15%|██████▋ | 7/46 [00:01<00:06, 5.88it/s]\u001b[A\n",
+ " 17%|███████▋ | 8/46 [00:01<00:05, 6.41it/s]\u001b[A\n",
+ " 20%|████████▌ | 9/46 [00:01<00:05, 6.69it/s]\u001b[A\n",
+ " 22%|█████████▎ | 10/46 [00:01<00:05, 7.00it/s]\u001b[A\n",
+ " 24%|██████████▎ | 11/46 [00:02<00:05, 6.87it/s]\u001b[A\n",
+ " 26%|███████████▏ | 12/46 [00:02<00:05, 6.23it/s]\u001b[A\n",
+ " 28%|████████████▏ | 13/46 [00:02<00:05, 5.53it/s]\u001b[A\n",
+ " 30%|█████████████ | 14/46 [00:02<00:07, 4.35it/s]\u001b[A\n",
+ " 33%|██████████████ | 15/46 [00:03<00:14, 2.10it/s]\u001b[A\n",
+ " 35%|██████████████▉ | 16/46 [00:04<00:19, 1.57it/s]\u001b[A\n",
+ " 37%|███████████████▉ | 17/46 [00:05<00:15, 1.83it/s]\u001b[A\n",
+ " 39%|████████████████▊ | 18/46 [00:05<00:13, 2.15it/s]\u001b[A\n",
+ " 41%|█████████████████▊ | 19/46 [00:05<00:10, 2.51it/s]\u001b[A\n",
+ " 43%|██████████████████▋ | 20/46 [00:05<00:08, 2.91it/s]\u001b[A\n",
+ " 46%|███████████████████▋ | 21/46 [00:06<00:07, 3.49it/s]\u001b[A\n",
+ " 48%|████████████████████▌ | 22/46 [00:06<00:05, 4.10it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 23/46 [00:06<00:04, 4.73it/s]\u001b[A\n",
+ " 52%|██████████████████████▍ | 24/46 [00:06<00:04, 5.36it/s]\u001b[A\n",
+ " 54%|███████████████████████▎ | 25/46 [00:06<00:03, 6.02it/s]\u001b[A\n",
+ " 57%|████████████████████████▎ | 26/46 [00:06<00:03, 6.66it/s]\u001b[A\n",
+ " 59%|█████████████████████████▏ | 27/46 [00:06<00:02, 7.06it/s]\u001b[A\n",
+ " 63%|███████████████████████████ | 29/46 [00:07<00:02, 8.06it/s]\u001b[A\n",
+ " 65%|████████████████████████████ | 30/46 [00:07<00:01, 8.26it/s]\u001b[A\n",
+ " 67%|████████████████████████████▉ | 31/46 [00:07<00:01, 8.38it/s]\u001b[A\n",
+ " 70%|█████████████████████████████▉ | 32/46 [00:07<00:01, 8.30it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▊ | 33/46 [00:07<00:01, 7.96it/s]\u001b[A\n",
+ " 74%|███████████████████████████████▊ | 34/46 [00:07<00:01, 7.71it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 35/46 [00:07<00:01, 7.32it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▋ | 36/46 [00:08<00:01, 5.57it/s]\u001b[A\n",
+ " 80%|██████████████████████████████████▌ | 37/46 [00:10<00:06, 1.36it/s]\u001b[A\n",
+ " 83%|███████████████████████████████████▌ | 38/46 [00:10<00:05, 1.55it/s]\u001b[A\n",
+ " 85%|████████████████████████████████████▍ | 39/46 [00:10<00:03, 1.97it/s]\u001b[A\n",
+ " 87%|█████████████████████████████████████▍ | 40/46 [00:10<00:02, 2.51it/s]\u001b[A\n",
+ " 89%|██████████████████████████████████████▎ | 41/46 [00:11<00:01, 3.11it/s]\u001b[A\n",
+ " 91%|███████████████████████████████████████▎ | 42/46 [00:11<00:01, 3.75it/s]\u001b[A\n",
+ " 93%|████████████████████████████████████████▏ | 43/46 [00:11<00:00, 4.32it/s]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▏ | 44/46 [00:11<00:00, 4.38it/s]\u001b[A\n",
+ " 98%|██████████████████████████████████████████ | 45/46 [00:11<00:00, 3.93it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 1.6183497905731201, 'eval_runtime': 14.0479, 'eval_samples_per_second': 3.275, 'eval_steps_per_second': 3.275, 'epoch': 2.0}\n",
+ " 33%|███████████▋ | 1120/3360 [2:10:07<4:22:41, 7.04s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:13<00:00, 1.67it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-07-04 17:36:29,696 >> Saving model checkpoint to saves/qwen2-7b/lora/sft/checkpoint-1120\n",
+ "[INFO|configuration_utils.py:733] 2024-07-04 17:36:31,166 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-04 17:36:31,166 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-7B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 3584,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 18944,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 28,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 4,\n",
+ " \"quantization_config\": {\n",
+ " \"_load_in_4bit\": true,\n",
+ " \"_load_in_8bit\": false,\n",
+ " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n",
+ " \"bnb_4bit_quant_storage\": \"uint8\",\n",
+ " \"bnb_4bit_quant_type\": \"nf4\",\n",
+ " \"bnb_4bit_use_double_quant\": true,\n",
+ " \"llm_int8_enable_fp32_cpu_offload\": false,\n",
+ " \"llm_int8_has_fp16_weight\": false,\n",
+ " \"llm_int8_skip_modules\": null,\n",
+ " \"llm_int8_threshold\": 6.0,\n",
+ " \"load_in_4bit\": true,\n",
+ " \"load_in_8bit\": false,\n",
+ " \"quant_method\": \"bitsandbytes\"\n",
+ " },\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 131072,\n",
+ " \"tie_word_embeddings\": false,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 152064\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-07-04 17:36:31,345 >> tokenizer config file saved in saves/qwen2-7b/lora/sft/checkpoint-1120/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-07-04 17:36:31,345 >> Special tokens file saved in saves/qwen2-7b/lora/sft/checkpoint-1120/special_tokens_map.json\n",
+ "{'loss': 0.4204, 'grad_norm': 3.7729196548461914, 'learning_rate': 8.393240776696274e-05, 'epoch': 2.02}\n",
+ "{'loss': 0.3656, 'grad_norm': 2.241997718811035, 'learning_rate': 8.354907139929851e-05, 'epoch': 2.03}\n",
+ "{'loss': 0.3554, 'grad_norm': 3.0927772521972656, 'learning_rate': 8.316211416299397e-05, 'epoch': 2.05}\n",
+ "{'loss': 0.3576, 'grad_norm': 4.350724697113037, 'learning_rate': 8.27715778213905e-05, 'epoch': 2.07}\n",
+ "{'loss': 0.2409, 'grad_norm': 2.0694334506988525, 'learning_rate': 8.237750452411353e-05, 'epoch': 2.09}\n",
+ "{'loss': 0.3769, 'grad_norm': 2.3954668045043945, 'learning_rate': 8.197993680252334e-05, 'epoch': 2.11}\n",
+ "{'loss': 0.4252, 'grad_norm': 2.997573137283325, 'learning_rate': 8.157891756512488e-05, 'epoch': 2.12}\n",
+ "{'loss': 0.3827, 'grad_norm': 3.1807985305786133, 'learning_rate': 8.117449009293668e-05, 'epoch': 2.14}\n",
+ "{'loss': 0.3291, 'grad_norm': 4.341946125030518, 'learning_rate': 8.076669803481965e-05, 'epoch': 2.16}\n",
+ "{'loss': 0.3049, 'grad_norm': 4.446887493133545, 'learning_rate': 8.035558540276618e-05, 'epoch': 2.18}\n",
+ "{'loss': 0.3638, 'grad_norm': 2.7504091262817383, 'learning_rate': 7.994119656715002e-05, 'epoch': 2.2}\n",
+ "{'loss': 0.4044, 'grad_norm': 2.769212484359741, 'learning_rate': 7.952357625193749e-05, 'epoch': 2.21}\n",
+ "{'loss': 0.3703, 'grad_norm': 5.1489362716674805, 'learning_rate': 7.91027695298606e-05, 'epoch': 2.23}\n",
+ "{'loss': 0.3866, 'grad_norm': 4.870989799499512, 'learning_rate': 7.86788218175523e-05, 'epoch': 2.25}\n",
+ "{'loss': 0.383, 'grad_norm': 2.3800389766693115, 'learning_rate': 7.8251778870645e-05, 'epoch': 2.27}\n",
+ "{'loss': 0.3855, 'grad_norm': 3.800349473953247, 'learning_rate': 7.782168677883206e-05, 'epoch': 2.28}\n",
+ "{'loss': 0.4051, 'grad_norm': 2.723214864730835, 'learning_rate': 7.738859196089358e-05, 'epoch': 2.3}\n",
+ "{'loss': 0.4282, 'grad_norm': 3.5306265354156494, 'learning_rate': 7.695254115968648e-05, 'epoch': 2.32}\n",
+ "{'loss': 0.4128, 'grad_norm': 2.6264665126800537, 'learning_rate': 7.651358143709972e-05, 'epoch': 2.34}\n",
+ "{'loss': 0.4174, 'grad_norm': 3.427201747894287, 'learning_rate': 7.60717601689749e-05, 'epoch': 2.36}\n",
+ "{'loss': 0.3553, 'grad_norm': 3.8674330711364746, 'learning_rate': 7.562712503999327e-05, 'epoch': 2.37}\n",
+ "{'loss': 0.4509, 'grad_norm': 3.253030776977539, 'learning_rate': 7.517972403852905e-05, 'epoch': 2.39}\n",
+ "{'loss': 0.3599, 'grad_norm': 3.4824795722961426, 'learning_rate': 7.472960545147038e-05, 'epoch': 2.41}\n",
+ "{'loss': 0.3248, 'grad_norm': 4.311473369598389, 'learning_rate': 7.427681785900761e-05, 'epoch': 2.43}\n",
+ "{'loss': 0.3835, 'grad_norm': 3.2026665210723877, 'learning_rate': 7.382141012939034e-05, 'epoch': 2.45}\n",
+ "{'loss': 0.3631, 'grad_norm': 3.4886059761047363, 'learning_rate': 7.33634314136531e-05, 'epoch': 2.46}\n",
+ "{'loss': 0.4001, 'grad_norm': 2.1931118965148926, 'learning_rate': 7.290293114031061e-05, 'epoch': 2.48}\n",
+ "{'loss': 0.4094, 'grad_norm': 3.082930564880371, 'learning_rate': 7.243995901002312e-05, 'epoch': 2.5}\n",
+ "{'loss': 0.3916, 'grad_norm': 2.8144562244415283, 'learning_rate': 7.197456499023225e-05, 'epoch': 2.52}\n",
+ "{'loss': 0.4212, 'grad_norm': 3.546799898147583, 'learning_rate': 7.150679930976825e-05, 'epoch': 2.53}\n",
+ "{'loss': 0.3852, 'grad_norm': 3.623589038848877, 'learning_rate': 7.103671245342887e-05, 'epoch': 2.55}\n",
+ "{'loss': 0.3294, 'grad_norm': 3.896050214767456, 'learning_rate': 7.056435515653059e-05, 'epoch': 2.57}\n",
+ "{'loss': 0.4378, 'grad_norm': 2.8549437522888184, 'learning_rate': 7.008977839943299e-05, 'epoch': 2.59}\n",
+ "{'loss': 0.3744, 'grad_norm': 2.963679313659668, 'learning_rate': 6.961303340203653e-05, 'epoch': 2.61}\n",
+ "{'loss': 0.4083, 'grad_norm': 3.584379196166992, 'learning_rate': 6.91341716182545e-05, 'epoch': 2.62}\n",
+ "{'loss': 0.3875, 'grad_norm': 3.231067180633545, 'learning_rate': 6.86532447304597e-05, 'epoch': 2.64}\n",
+ "{'loss': 0.3555, 'grad_norm': 3.2355687618255615, 'learning_rate': 6.817030464390656e-05, 'epoch': 2.66}\n",
+ "{'loss': 0.3962, 'grad_norm': 4.36820125579834, 'learning_rate': 6.768540348112907e-05, 'epoch': 2.68}\n",
+ "{'loss': 0.3224, 'grad_norm': 2.6882545948028564, 'learning_rate': 6.719859357631535e-05, 'epoch': 2.7}\n",
+ "{'loss': 0.3478, 'grad_norm': 3.5584182739257812, 'learning_rate': 6.670992746965938e-05, 'epoch': 2.71}\n",
+ "{'loss': 0.4298, 'grad_norm': 4.19834041595459, 'learning_rate': 6.621945790169036e-05, 'epoch': 2.73}\n",
+ "{'loss': 0.4304, 'grad_norm': 4.770883083343506, 'learning_rate': 6.572723780758069e-05, 'epoch': 2.75}\n",
+ "{'loss': 0.3657, 'grad_norm': 4.010149955749512, 'learning_rate': 6.523332031143272e-05, 'epoch': 2.77}\n",
+ "{'loss': 0.3699, 'grad_norm': 3.2105469703674316, 'learning_rate': 6.473775872054521e-05, 'epoch': 2.78}\n",
+ "{'loss': 0.3342, 'grad_norm': 3.494490146636963, 'learning_rate': 6.424060651966007e-05, 'epoch': 2.8}\n",
+ "{'loss': 0.327, 'grad_norm': 3.291541814804077, 'learning_rate': 6.374191736518974e-05, 'epoch': 2.82}\n",
+ "{'loss': 0.3928, 'grad_norm': 3.125520706176758, 'learning_rate': 6.324174507942637e-05, 'epoch': 2.84}\n",
+ "{'loss': 0.3776, 'grad_norm': 4.660810470581055, 'learning_rate': 6.274014364473274e-05, 'epoch': 2.86}\n",
+ "{'loss': 0.4623, 'grad_norm': 2.8751118183135986, 'learning_rate': 6.22371671977162e-05, 'epoch': 2.87}\n",
+ "{'loss': 0.4122, 'grad_norm': 4.0637078285217285, 'learning_rate': 6.173287002338577e-05, 'epoch': 2.89}\n",
+ "{'loss': 0.4056, 'grad_norm': 3.7399301528930664, 'learning_rate': 6.122730654929334e-05, 'epoch': 2.91}\n",
+ "{'loss': 0.3351, 'grad_norm': 4.581759452819824, 'learning_rate': 6.072053133965938e-05, 'epoch': 2.93}\n",
+ "{'loss': 0.3849, 'grad_norm': 3.381431818008423, 'learning_rate': 6.021259908948402e-05, 'epoch': 2.95}\n",
+ "{'loss': 0.3947, 'grad_norm': 4.740965366363525, 'learning_rate': 5.970356461864391e-05, 'epoch': 2.96}\n",
+ "{'loss': 0.3945, 'grad_norm': 5.124401569366455, 'learning_rate': 5.919348286597569e-05, 'epoch': 2.98}\n",
+ "{'loss': 0.4098, 'grad_norm': 3.3869075775146484, 'learning_rate': 5.868240888334653e-05, 'epoch': 3.0}\n",
+ " 50%|█████████████████▌ | 1680/3360 [3:13:07<3:03:37, 6.56s/it][INFO|trainer.py:3788] 2024-07-04 18:39:30,098 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-07-04 18:39:30,098 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-07-04 18:39:30,098 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 4%|█▉ | 2/46 [00:00<00:03, 13.92it/s]\u001b[A\n",
+ " 9%|███▊ | 4/46 [00:00<00:05, 7.78it/s]\u001b[A\n",
+ " 13%|█████▋ | 6/46 [00:00<00:06, 6.54it/s]\u001b[A\n",
+ " 15%|██████▋ | 7/46 [00:01<00:06, 5.99it/s]\u001b[A\n",
+ " 17%|███████▋ | 8/46 [00:01<00:07, 4.94it/s]\u001b[A\n",
+ " 20%|████████▌ | 9/46 [00:01<00:10, 3.63it/s]\u001b[A\n",
+ " 22%|█████████▎ | 10/46 [00:03<00:26, 1.36it/s]\u001b[A\n",
+ " 24%|██████████▎ | 11/46 [00:04<00:26, 1.34it/s]\u001b[A\n",
+ " 26%|███████████▏ | 12/46 [00:04<00:20, 1.62it/s]\u001b[A\n",
+ " 28%|████████████▏ | 13/46 [00:04<00:16, 2.03it/s]\u001b[A\n",
+ " 30%|█████████████ | 14/46 [00:05<00:13, 2.46it/s]\u001b[A\n",
+ " 33%|██████████████ | 15/46 [00:05<00:10, 2.89it/s]\u001b[A\n",
+ " 35%|██████████████▉ | 16/46 [00:05<00:09, 3.32it/s]\u001b[A\n",
+ " 37%|███████████████▉ | 17/46 [00:05<00:07, 3.74it/s]\u001b[A\n",
+ " 39%|████████████████▊ | 18/46 [00:05<00:06, 4.13it/s]\u001b[A\n",
+ " 41%|█████████████████▊ | 19/46 [00:06<00:06, 4.19it/s]\u001b[A\n",
+ " 43%|██████████████████▋ | 20/46 [00:06<00:06, 4.31it/s]\u001b[A\n",
+ " 46%|███████████████████▋ | 21/46 [00:06<00:05, 4.72it/s]\u001b[A\n",
+ " 48%|████████████████████▌ | 22/46 [00:06<00:04, 5.04it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 23/46 [00:06<00:04, 5.33it/s]\u001b[A\n",
+ " 52%|██████████████████████▍ | 24/46 [00:06<00:04, 5.49it/s]\u001b[A\n",
+ " 54%|███████████████████████▎ | 25/46 [00:07<00:03, 5.80it/s]\u001b[A\n",
+ " 57%|████████████████████████▎ | 26/46 [00:07<00:03, 5.90it/s]\u001b[A\n",
+ " 59%|█████████████████████████▏ | 27/46 [00:07<00:03, 5.85it/s]\u001b[A\n",
+ " 61%|██████████████████████████▏ | 28/46 [00:07<00:03, 5.65it/s]\u001b[A\n",
+ " 63%|███████████████████████████ | 29/46 [00:07<00:03, 5.47it/s]\u001b[A\n",
+ " 65%|████████████████████████████ | 30/46 [00:08<00:03, 4.67it/s]\u001b[A\n",
+ " 67%|████████████████████████████▉ | 31/46 [00:09<00:09, 1.53it/s]\u001b[A\n",
+ " 70%|█████████████████████████████▉ | 32/46 [00:10<00:07, 1.81it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▊ | 33/46 [00:10<00:06, 2.16it/s]\u001b[A\n",
+ " 74%|███████████████████████████████▊ | 34/46 [00:10<00:04, 2.48it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 35/46 [00:10<00:04, 2.72it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▋ | 36/46 [00:11<00:03, 2.82it/s]\u001b[A\n",
+ " 80%|██████████████████████████████████▌ | 37/46 [00:12<00:06, 1.41it/s]\u001b[A\n",
+ " 83%|███████████████████████████████████▌ | 38/46 [00:13<00:05, 1.49it/s]\u001b[A\n",
+ " 85%|████████████████████████████████████▍ | 39/46 [00:13<00:03, 1.92it/s]\u001b[A\n",
+ " 87%|█████████████████████████████████████▍ | 40/46 [00:13<00:02, 2.46it/s]\u001b[A\n",
+ " 89%|██████████████████████████████████████▎ | 41/46 [00:13<00:01, 3.12it/s]\u001b[A\n",
+ " 91%|███████████████████████████████████████▎ | 42/46 [00:13<00:01, 3.85it/s]\u001b[A\n",
+ " 93%|████████████████████████████████████████▏ | 43/46 [00:14<00:00, 4.55it/s]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▏ | 44/46 [00:14<00:00, 5.25it/s]\u001b[A\n",
+ " 98%|██████████████████████████████████████████ | 45/46 [00:14<00:00, 5.92it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 2.002082109451294, 'eval_runtime': 14.5597, 'eval_samples_per_second': 3.159, 'eval_steps_per_second': 3.159, 'epoch': 3.0}\n",
+ " 50%|█████████████████▌ | 1680/3360 [3:13:22<3:03:37, 6.56s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:14<00:00, 6.39it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-07-04 18:39:44,661 >> Saving model checkpoint to saves/qwen2-7b/lora/sft/checkpoint-1680\n",
+ "[INFO|configuration_utils.py:733] 2024-07-04 18:39:46,491 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-04 18:39:46,492 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-7B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 3584,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 18944,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 28,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 4,\n",
+ " \"quantization_config\": {\n",
+ " \"_load_in_4bit\": true,\n",
+ " \"_load_in_8bit\": false,\n",
+ " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n",
+ " \"bnb_4bit_quant_storage\": \"uint8\",\n",
+ " \"bnb_4bit_quant_type\": \"nf4\",\n",
+ " \"bnb_4bit_use_double_quant\": true,\n",
+ " \"llm_int8_enable_fp32_cpu_offload\": false,\n",
+ " \"llm_int8_has_fp16_weight\": false,\n",
+ " \"llm_int8_skip_modules\": null,\n",
+ " \"llm_int8_threshold\": 6.0,\n",
+ " \"load_in_4bit\": true,\n",
+ " \"load_in_8bit\": false,\n",
+ " \"quant_method\": \"bitsandbytes\"\n",
+ " },\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 131072,\n",
+ " \"tie_word_embeddings\": false,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 152064\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-07-04 18:39:46,762 >> tokenizer config file saved in saves/qwen2-7b/lora/sft/checkpoint-1680/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-07-04 18:39:46,762 >> Special tokens file saved in saves/qwen2-7b/lora/sft/checkpoint-1680/special_tokens_map.json\n",
+ "{'loss': 0.1954, 'grad_norm': 2.6332297325134277, 'learning_rate': 5.8170397829712485e-05, 'epoch': 3.02}\n",
+ "{'loss': 0.0883, 'grad_norm': 3.9817214012145996, 'learning_rate': 5.765750496516547e-05, 'epoch': 3.03}\n",
+ "{'loss': 0.1392, 'grad_norm': 1.9517065286636353, 'learning_rate': 5.714378564496901e-05, 'epoch': 3.05}\n",
+ "{'loss': 0.1231, 'grad_norm': 1.8976528644561768, 'learning_rate': 5.6629295313583974e-05, 'epoch': 3.07}\n",
+ "{'loss': 0.1299, 'grad_norm': 1.4403581619262695, 'learning_rate': 5.611408949868457e-05, 'epoch': 3.09}\n",
+ "{'loss': 0.1068, 'grad_norm': 3.757260322570801, 'learning_rate': 5.559822380516539e-05, 'epoch': 3.11}\n",
+ "{'loss': 0.0975, 'grad_norm': 1.9946837425231934, 'learning_rate': 5.5081753909140096e-05, 'epoch': 3.12}\n",
+ "{'loss': 0.1205, 'grad_norm': 3.075326442718506, 'learning_rate': 5.456473555193242e-05, 'epoch': 3.14}\n",
+ "{'loss': 0.1226, 'grad_norm': 2.4876396656036377, 'learning_rate': 5.404722453406017e-05, 'epoch': 3.16}\n",
+ "{'loss': 0.1144, 'grad_norm': 2.430744171142578, 'learning_rate': 5.3529276709212816e-05, 'epoch': 3.18}\n",
+ "{'loss': 0.1399, 'grad_norm': 3.6195318698883057, 'learning_rate': 5.30109479782233e-05, 'epoch': 3.2}\n",
+ "{'loss': 0.1156, 'grad_norm': 3.914135217666626, 'learning_rate': 5.249229428303486e-05, 'epoch': 3.21}\n",
+ "{'loss': 0.1372, 'grad_norm': 1.994607925415039, 'learning_rate': 5.197337160066331e-05, 'epoch': 3.23}\n",
+ "{'loss': 0.1138, 'grad_norm': 1.6210600137710571, 'learning_rate': 5.145423593715557e-05, 'epoch': 3.25}\n",
+ "{'loss': 0.14, 'grad_norm': 2.50508713722229, 'learning_rate': 5.0934943321545115e-05, 'epoch': 3.27}\n",
+ "{'loss': 0.1152, 'grad_norm': 4.362739562988281, 'learning_rate': 5.041554979980486e-05, 'epoch': 3.28}\n",
+ "{'loss': 0.1549, 'grad_norm': 3.601013422012329, 'learning_rate': 4.9896111428798254e-05, 'epoch': 3.3}\n",
+ "{'loss': 0.1429, 'grad_norm': 2.076098680496216, 'learning_rate': 4.9376684270229254e-05, 'epoch': 3.32}\n",
+ "{'loss': 0.1353, 'grad_norm': 1.633200764656067, 'learning_rate': 4.8857324384591653e-05, 'epoch': 3.34}\n",
+ "{'loss': 0.1284, 'grad_norm': 4.053235054016113, 'learning_rate': 4.8338087825118675e-05, 'epoch': 3.36}\n",
+ "{'loss': 0.1526, 'grad_norm': 2.4892356395721436, 'learning_rate': 4.781903063173321e-05, 'epoch': 3.37}\n",
+ "{'loss': 0.1042, 'grad_norm': 1.8938469886779785, 'learning_rate': 4.730020882499964e-05, 'epoch': 3.39}\n",
+ "{'loss': 0.1569, 'grad_norm': 1.758270502090454, 'learning_rate': 4.678167840007767e-05, 'epoch': 3.41}\n",
+ "{'loss': 0.117, 'grad_norm': 1.9446786642074585, 'learning_rate': 4.626349532067879e-05, 'epoch': 3.43}\n",
+ "{'loss': 0.1603, 'grad_norm': 2.5028741359710693, 'learning_rate': 4.574571551302647e-05, 'epoch': 3.44}\n",
+ "{'loss': 0.1528, 'grad_norm': 3.524077892303467, 'learning_rate': 4.522839485981994e-05, 'epoch': 3.46}\n",
+ "{'loss': 0.1366, 'grad_norm': 2.425860643386841, 'learning_rate': 4.471158919420312e-05, 'epoch': 3.48}\n",
+ "{'loss': 0.1231, 'grad_norm': 2.6059088706970215, 'learning_rate': 4.4195354293738484e-05, 'epoch': 3.5}\n",
+ "{'loss': 0.1479, 'grad_norm': 3.934004783630371, 'learning_rate': 4.367974587438733e-05, 'epoch': 3.52}\n",
+ "{'loss': 0.1466, 'grad_norm': 2.3225414752960205, 'learning_rate': 4.316481958449634e-05, 'epoch': 3.53}\n",
+ "{'loss': 0.1161, 'grad_norm': 3.3421878814697266, 'learning_rate': 4.2650630998791615e-05, 'epoch': 3.55}\n",
+ "{'loss': 0.1312, 'grad_norm': 2.411162853240967, 'learning_rate': 4.213723561238074e-05, 'epoch': 3.57}\n",
+ "{'loss': 0.1144, 'grad_norm': 2.74504017829895, 'learning_rate': 4.162468883476319e-05, 'epoch': 3.59}\n",
+ "{'loss': 0.1303, 'grad_norm': 3.3871073722839355, 'learning_rate': 4.111304598385018e-05, 'epoch': 3.61}\n",
+ "{'loss': 0.1272, 'grad_norm': 2.4120686054229736, 'learning_rate': 4.060236227999441e-05, 'epoch': 3.62}\n",
+ "{'loss': 0.1127, 'grad_norm': 2.2959489822387695, 'learning_rate': 4.0092692840030134e-05, 'epoch': 3.64}\n",
+ "{'loss': 0.131, 'grad_norm': 2.5716683864593506, 'learning_rate': 3.9584092671324606e-05, 'epoch': 3.66}\n",
+ "{'loss': 0.1512, 'grad_norm': 3.035562753677368, 'learning_rate': 3.907661666584131e-05, 'epoch': 3.68}\n",
+ "{'loss': 0.1253, 'grad_norm': 2.897613048553467, 'learning_rate': 3.857031959421553e-05, 'epoch': 3.69}\n",
+ "{'loss': 0.1084, 'grad_norm': 2.2627975940704346, 'learning_rate': 3.806525609984312e-05, 'epoch': 3.71}\n",
+ "{'loss': 0.105, 'grad_norm': 2.2742927074432373, 'learning_rate': 3.7561480692983006e-05, 'epoch': 3.73}\n",
+ "{'loss': 0.1489, 'grad_norm': 1.9651683568954468, 'learning_rate': 3.705904774487396e-05, 'epoch': 3.75}\n",
+ "{'loss': 0.1448, 'grad_norm': 4.107623100280762, 'learning_rate': 3.655801148186655e-05, 'epoch': 3.77}\n",
+ "{'loss': 0.0998, 'grad_norm': 2.270852565765381, 'learning_rate': 3.6058425979570485e-05, 'epoch': 3.78}\n",
+ "{'loss': 0.1176, 'grad_norm': 3.770810842514038, 'learning_rate': 3.556034515701852e-05, 'epoch': 3.8}\n",
+ "{'loss': 0.1175, 'grad_norm': 4.139482498168945, 'learning_rate': 3.506382277084696e-05, 'epoch': 3.82}\n",
+ "{'loss': 0.152, 'grad_norm': 2.7534141540527344, 'learning_rate': 3.4568912409493945e-05, 'epoch': 3.84}\n",
+ "{'loss': 0.0974, 'grad_norm': 2.224083423614502, 'learning_rate': 3.4075667487415785e-05, 'epoch': 3.86}\n",
+ "{'loss': 0.1133, 'grad_norm': 1.7634135484695435, 'learning_rate': 3.358414123932195e-05, 'epoch': 3.87}\n",
+ "{'loss': 0.1311, 'grad_norm': 2.7758963108062744, 'learning_rate': 3.3094386714429724e-05, 'epoch': 3.89}\n",
+ "{'loss': 0.1341, 'grad_norm': 2.842358350753784, 'learning_rate': 3.2606456770738636e-05, 'epoch': 3.91}\n",
+ "{'loss': 0.0884, 'grad_norm': 1.71796452999115, 'learning_rate': 3.212040406932569e-05, 'epoch': 3.93}\n",
+ "{'loss': 0.0956, 'grad_norm': 2.689420461654663, 'learning_rate': 3.163628106866172e-05, 'epoch': 3.94}\n",
+ "{'loss': 0.1731, 'grad_norm': 2.630415439605713, 'learning_rate': 3.115414001894974e-05, 'epoch': 3.96}\n",
+ "{'loss': 0.1458, 'grad_norm': 2.928737163543701, 'learning_rate': 3.067403295648566e-05, 'epoch': 3.98}\n",
+ "{'loss': 0.1278, 'grad_norm': 2.467090129852295, 'learning_rate': 3.019601169804216e-05, 'epoch': 4.0}\n",
+ " 67%|███████████████████████▎ | 2240/3360 [4:14:45<2:03:53, 6.64s/it][INFO|trainer.py:3788] 2024-07-04 19:41:08,043 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-07-04 19:41:08,044 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-07-04 19:41:08,044 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 4%|█▉ | 2/46 [00:00<00:03, 13.68it/s]\u001b[A\n",
+ " 9%|███▊ | 4/46 [00:00<00:04, 8.54it/s]\u001b[A\n",
+ " 11%|████▊ | 5/46 [00:00<00:05, 8.15it/s]\u001b[A\n",
+ " 13%|█████▋ | 6/46 [00:00<00:05, 7.67it/s]\u001b[A\n",
+ " 15%|██████▋ | 7/46 [00:00<00:05, 7.17it/s]\u001b[A\n",
+ " 17%|███████▋ | 8/46 [00:01<00:05, 6.86it/s]\u001b[A\n",
+ " 20%|████████▌ | 9/46 [00:01<00:05, 6.53it/s]\u001b[A\n",
+ " 22%|█████████▎ | 10/46 [00:01<00:06, 5.50it/s]\u001b[A\n",
+ " 24%|██████████▎ | 11/46 [00:01<00:07, 4.52it/s]\u001b[A\n",
+ " 26%|███████████▏ | 12/46 [00:03<00:18, 1.87it/s]\u001b[A\n",
+ " 28%|████████████▏ | 13/46 [00:04<00:22, 1.44it/s]\u001b[A\n",
+ " 30%|█████████████ | 14/46 [00:04<00:18, 1.78it/s]\u001b[A\n",
+ " 33%|██████████████ | 15/46 [00:04<00:13, 2.22it/s]\u001b[A\n",
+ " 35%|██████████████▉ | 16/46 [00:04<00:11, 2.72it/s]\u001b[A\n",
+ " 37%|███████████████▉ | 17/46 [00:04<00:09, 3.13it/s]\u001b[A\n",
+ " 39%|████████████████▊ | 18/46 [00:05<00:07, 3.65it/s]\u001b[A\n",
+ " 41%|█████████████████▊ | 19/46 [00:05<00:06, 4.08it/s]\u001b[A\n",
+ " 43%|██████████████████▋ | 20/46 [00:05<00:05, 4.42it/s]\u001b[A\n",
+ " 46%|███████████████████▋ | 21/46 [00:05<00:05, 4.48it/s]\u001b[A\n",
+ " 48%|████████████████████▌ | 22/46 [00:05<00:05, 4.72it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 23/46 [00:06<00:04, 4.77it/s]\u001b[A\n",
+ " 52%|██████████████████████▍ | 24/46 [00:06<00:05, 4.21it/s]\u001b[A\n",
+ " 54%|███████████████████████▎ | 25/46 [00:06<00:05, 4.13it/s]\u001b[A\n",
+ " 57%|████████████████████████▎ | 26/46 [00:06<00:05, 3.63it/s]\u001b[A\n",
+ " 59%|█████████████████████████▏ | 27/46 [00:07<00:06, 3.16it/s]\u001b[A\n",
+ " 61%|██████████████████████████▏ | 28/46 [00:09<00:13, 1.34it/s]\u001b[A\n",
+ " 63%|███████████████████████████ | 29/46 [00:09<00:11, 1.43it/s]\u001b[A\n",
+ " 65%|████████████████████████████ | 30/46 [00:09<00:08, 1.84it/s]\u001b[A\n",
+ " 67%|████████████████████████████▉ | 31/46 [00:10<00:06, 2.28it/s]\u001b[A\n",
+ " 70%|█████████████████████████████▉ | 32/46 [00:10<00:05, 2.71it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▊ | 33/46 [00:10<00:04, 3.14it/s]\u001b[A\n",
+ " 74%|███████████████████████████████▊ | 34/46 [00:10<00:03, 3.74it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 35/46 [00:10<00:02, 4.33it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▋ | 36/46 [00:10<00:02, 4.70it/s]\u001b[A\n",
+ " 80%|██████████████████████████████████▌ | 37/46 [00:11<00:01, 5.04it/s]\u001b[A\n",
+ " 83%|███████████████████████████████████▌ | 38/46 [00:11<00:01, 5.26it/s]\u001b[A\n",
+ " 85%|████████████████████████████████████▍ | 39/46 [00:11<00:01, 5.47it/s]\u001b[A\n",
+ " 87%|█████████████████████████████████████▍ | 40/46 [00:11<00:01, 5.30it/s]\u001b[A\n",
+ " 89%|██████████████████████████████████████▎ | 41/46 [00:11<00:00, 5.17it/s]\u001b[A\n",
+ " 91%|███████████████████████████████████████▎ | 42/46 [00:12<00:00, 5.07it/s]\u001b[A\n",
+ " 93%|████████████████████████████████████████▏ | 43/46 [00:12<00:00, 5.08it/s]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▏ | 44/46 [00:12<00:00, 4.41it/s]\u001b[A\n",
+ " 98%|██████████████████████████████████████████ | 45/46 [00:12<00:00, 4.08it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 2.360382080078125, 'eval_runtime': 13.265, 'eval_samples_per_second': 3.468, 'eval_steps_per_second': 3.468, 'epoch': 4.0}\n",
+ " 67%|███████████████████████▎ | 2240/3360 [4:14:58<2:03:53, 6.64s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:13<00:00, 4.15it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-07-04 19:41:21,314 >> Saving model checkpoint to saves/qwen2-7b/lora/sft/checkpoint-2240\n",
+ "[INFO|configuration_utils.py:733] 2024-07-04 19:41:22,728 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-04 19:41:22,729 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-7B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 3584,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 18944,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 28,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 4,\n",
+ " \"quantization_config\": {\n",
+ " \"_load_in_4bit\": true,\n",
+ " \"_load_in_8bit\": false,\n",
+ " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n",
+ " \"bnb_4bit_quant_storage\": \"uint8\",\n",
+ " \"bnb_4bit_quant_type\": \"nf4\",\n",
+ " \"bnb_4bit_use_double_quant\": true,\n",
+ " \"llm_int8_enable_fp32_cpu_offload\": false,\n",
+ " \"llm_int8_has_fp16_weight\": false,\n",
+ " \"llm_int8_skip_modules\": null,\n",
+ " \"llm_int8_threshold\": 6.0,\n",
+ " \"load_in_4bit\": true,\n",
+ " \"load_in_8bit\": false,\n",
+ " \"quant_method\": \"bitsandbytes\"\n",
+ " },\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 131072,\n",
+ " \"tie_word_embeddings\": false,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 152064\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-07-04 19:41:22,849 >> tokenizer config file saved in saves/qwen2-7b/lora/sft/checkpoint-2240/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-07-04 19:41:22,850 >> Special tokens file saved in saves/qwen2-7b/lora/sft/checkpoint-2240/special_tokens_map.json\n",
+ "{'loss': 0.0535, 'grad_norm': 0.6558727025985718, 'learning_rate': 2.9720127835276256e-05, 'epoch': 4.02}\n",
+ "{'loss': 0.0402, 'grad_norm': 2.1889960765838623, 'learning_rate': 2.9246432729161055e-05, 'epoch': 4.03}\n",
+ "{'loss': 0.0377, 'grad_norm': 0.8100994229316711, 'learning_rate': 2.8774977504442647e-05, 'epoch': 4.05}\n",
+ "{'loss': 0.0398, 'grad_norm': 2.8209896087646484, 'learning_rate': 2.8305813044122097e-05, 'epoch': 4.07}\n",
+ "{'loss': 0.0308, 'grad_norm': 1.5516138076782227, 'learning_rate': 2.7838989983964065e-05, 'epoch': 4.09}\n",
+ "{'loss': 0.0597, 'grad_norm': 4.609562873840332, 'learning_rate': 2.737455870703155e-05, 'epoch': 4.11}\n",
+ "{'loss': 0.0231, 'grad_norm': 2.7549400329589844, 'learning_rate': 2.6912569338248315e-05, 'epoch': 4.12}\n",
+ "{'loss': 0.0448, 'grad_norm': 5.040008068084717, 'learning_rate': 2.645307173898901e-05, 'epoch': 4.14}\n",
+ "{'loss': 0.0253, 'grad_norm': 1.6336179971694946, 'learning_rate': 2.5996115501697694e-05, 'epoch': 4.16}\n",
+ "{'loss': 0.0289, 'grad_norm': 0.8074469566345215, 'learning_rate': 2.5541749944535554e-05, 'epoch': 4.18}\n",
+ "{'loss': 0.0338, 'grad_norm': 1.710808277130127, 'learning_rate': 2.5090024106057962e-05, 'epoch': 4.19}\n",
+ "{'loss': 0.0379, 'grad_norm': 2.1768016815185547, 'learning_rate': 2.464098673992205e-05, 'epoch': 4.21}\n",
+ "{'loss': 0.0682, 'grad_norm': 3.6282131671905518, 'learning_rate': 2.4194686309624663e-05, 'epoch': 4.23}\n",
+ "{'loss': 0.0515, 'grad_norm': 1.100537896156311, 'learning_rate': 2.3751170983272e-05, 'epoch': 4.25}\n",
+ "{'loss': 0.0354, 'grad_norm': 0.6081830859184265, 'learning_rate': 2.3310488628380757e-05, 'epoch': 4.27}\n",
+ "{'loss': 0.0334, 'grad_norm': 1.5605361461639404, 'learning_rate': 2.2872686806712035e-05, 'epoch': 4.28}\n",
+ "{'loss': 0.0492, 'grad_norm': 2.5406620502471924, 'learning_rate': 2.243781276913811e-05, 'epoch': 4.3}\n",
+ "{'loss': 0.0279, 'grad_norm': 2.160897970199585, 'learning_rate': 2.200591345054267e-05, 'epoch': 4.32}\n",
+ "{'loss': 0.0342, 'grad_norm': 2.3391342163085938, 'learning_rate': 2.157703546475539e-05, 'epoch': 4.34}\n",
+ "{'loss': 0.0332, 'grad_norm': 1.3248311281204224, 'learning_rate': 2.115122509952085e-05, 'epoch': 4.36}\n",
+ "{'loss': 0.0334, 'grad_norm': 2.741152763366699, 'learning_rate': 2.0728528311502976e-05, 'epoch': 4.37}\n",
+ "{'loss': 0.0542, 'grad_norm': 2.237809419631958, 'learning_rate': 2.0308990721324927e-05, 'epoch': 4.39}\n",
+ "{'loss': 0.0344, 'grad_norm': 3.8997409343719482, 'learning_rate': 1.989265760864542e-05, 'epoch': 4.41}\n",
+ "{'loss': 0.0439, 'grad_norm': 0.6022194623947144, 'learning_rate': 1.947957390727185e-05, 'epoch': 4.43}\n",
+ "{'loss': 0.0346, 'grad_norm': 1.2296243906021118, 'learning_rate': 1.906978420031059e-05, 'epoch': 4.44}\n",
+ "{'loss': 0.0209, 'grad_norm': 0.28131213784217834, 'learning_rate': 1.8663332715355396e-05, 'epoch': 4.46}\n",
+ "{'loss': 0.0271, 'grad_norm': 2.75640606880188, 'learning_rate': 1.8260263319713844e-05, 'epoch': 4.48}\n",
+ "{'loss': 0.0408, 'grad_norm': 3.289303779602051, 'learning_rate': 1.7860619515673033e-05, 'epoch': 4.5}\n",
+ "{'loss': 0.0344, 'grad_norm': 1.2157098054885864, 'learning_rate': 1.746444443580433e-05, 'epoch': 4.52}\n",
+ "{'loss': 0.0272, 'grad_norm': 1.5058122873306274, 'learning_rate': 1.7071780838308288e-05, 'epoch': 4.53}\n",
+ "{'loss': 0.0283, 'grad_norm': 1.8522496223449707, 'learning_rate': 1.6682671102399805e-05, 'epoch': 4.55}\n",
+ "{'loss': 0.027, 'grad_norm': 2.126176595687866, 'learning_rate': 1.629715722373423e-05, 'epoch': 4.57}\n",
+ "{'loss': 0.0434, 'grad_norm': 2.065514326095581, 'learning_rate': 1.5915280809874932e-05, 'epoch': 4.59}\n",
+ "{'loss': 0.0427, 'grad_norm': 2.2047812938690186, 'learning_rate': 1.553708307580265e-05, 'epoch': 4.61}\n",
+ "{'loss': 0.0266, 'grad_norm': 2.1723501682281494, 'learning_rate': 1.5162604839467265e-05, 'epoch': 4.62}\n",
+ "{'loss': 0.0201, 'grad_norm': 1.7166253328323364, 'learning_rate': 1.4791886517382413e-05, 'epoch': 4.64}\n",
+ "{'loss': 0.0306, 'grad_norm': 0.5556966066360474, 'learning_rate': 1.4424968120263504e-05, 'epoch': 4.66}\n",
+ "{'loss': 0.0249, 'grad_norm': 1.101198434829712, 'learning_rate': 1.4061889248709343e-05, 'epoch': 4.68}\n",
+ "{'loss': 0.0324, 'grad_norm': 0.6396570801734924, 'learning_rate': 1.370268908892825e-05, 'epoch': 4.69}\n",
+ "{'loss': 0.0303, 'grad_norm': 2.5093636512756348, 'learning_rate': 1.3347406408508695e-05, 'epoch': 4.71}\n",
+ "{'loss': 0.0522, 'grad_norm': 1.5739742517471313, 'learning_rate': 1.2996079552235263e-05, 'epoch': 4.73}\n",
+ "{'loss': 0.0293, 'grad_norm': 0.9539183974266052, 'learning_rate': 1.264874643795021e-05, 'epoch': 4.75}\n",
+ "{'loss': 0.0289, 'grad_norm': 0.5063753724098206, 'learning_rate': 1.230544455246101e-05, 'epoch': 4.77}\n",
+ "{'loss': 0.0457, 'grad_norm': 1.6972631216049194, 'learning_rate': 1.1966210947494583e-05, 'epoch': 4.78}\n",
+ "{'loss': 0.0228, 'grad_norm': 0.8949175477027893, 'learning_rate': 1.1631082235698316e-05, 'epoch': 4.8}\n",
+ "{'loss': 0.0345, 'grad_norm': 1.8337916135787964, 'learning_rate': 1.130009458668863e-05, 'epoch': 4.82}\n",
+ "{'loss': 0.0221, 'grad_norm': 2.356985569000244, 'learning_rate': 1.097328372314721e-05, 'epoch': 4.84}\n",
+ "{'loss': 0.0328, 'grad_norm': 2.9775609970092773, 'learning_rate': 1.0650684916965559e-05, 'epoch': 4.85}\n",
+ "{'loss': 0.0298, 'grad_norm': 2.2749829292297363, 'learning_rate': 1.0332332985438248e-05, 'epoch': 4.87}\n",
+ "{'loss': 0.0411, 'grad_norm': 1.9781012535095215, 'learning_rate': 1.0018262287505086e-05, 'epoch': 4.89}\n",
+ "{'loss': 0.0461, 'grad_norm': 1.8106870651245117, 'learning_rate': 9.708506720042932e-06, 'epoch': 4.91}\n",
+ "{'loss': 0.0354, 'grad_norm': 1.3991378545761108, 'learning_rate': 9.403099714207175e-06, 'epoch': 4.93}\n",
+ "{'loss': 0.0269, 'grad_norm': 0.6455625891685486, 'learning_rate': 9.102074231823727e-06, 'epoch': 4.94}\n",
+ "{'loss': 0.0339, 'grad_norm': 1.2710880041122437, 'learning_rate': 8.805462761831418e-06, 'epoch': 4.96}\n",
+ "{'loss': 0.0334, 'grad_norm': 1.1816545724868774, 'learning_rate': 8.513297316775625e-06, 'epoch': 4.98}\n",
+ "{'loss': 0.0301, 'grad_norm': 1.668415904045105, 'learning_rate': 8.225609429353187e-06, 'epoch': 5.0}\n",
+ " 83%|█████████████████████████████▏ | 2800/3360 [5:16:56<1:03:45, 6.83s/it][INFO|trainer.py:3788] 2024-07-04 20:43:18,672 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-07-04 20:43:18,672 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-07-04 20:43:18,673 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 4%|█▉ | 2/46 [00:00<00:03, 13.51it/s]\u001b[A\n",
+ " 9%|███▊ | 4/46 [00:00<00:04, 9.12it/s]\u001b[A\n",
+ " 11%|████▊ | 5/46 [00:00<00:04, 8.61it/s]\u001b[A\n",
+ " 13%|█████▋ | 6/46 [00:00<00:04, 8.44it/s]\u001b[A\n",
+ " 15%|██████▋ | 7/46 [00:00<00:04, 8.01it/s]\u001b[A\n",
+ " 17%|███████▋ | 8/46 [00:00<00:04, 7.64it/s]\u001b[A\n",
+ " 20%|████████▌ | 9/46 [00:01<00:05, 7.15it/s]\u001b[A\n",
+ " 22%|█████████▎ | 10/46 [00:01<00:05, 6.01it/s]\u001b[A\n",
+ " 24%|██████████▎ | 11/46 [00:01<00:07, 4.84it/s]\u001b[A\n",
+ " 26%|███████████▏ | 12/46 [00:01<00:07, 4.34it/s]\u001b[A\n",
+ " 28%|████████████▏ | 13/46 [00:02<00:07, 4.27it/s]\u001b[A\n",
+ " 30%|█████████████ | 14/46 [00:02<00:07, 4.39it/s]\u001b[A\n",
+ " 33%|██████████████ | 15/46 [00:02<00:07, 4.38it/s]\u001b[A\n",
+ " 35%|██████████████▉ | 16/46 [00:02<00:06, 4.41it/s]\u001b[A\n",
+ " 37%|███████████████▉ | 17/46 [00:03<00:06, 4.23it/s]\u001b[A\n",
+ " 39%|████████████████▊ | 18/46 [00:03<00:06, 4.24it/s]\u001b[A\n",
+ " 41%|█████████████████▊ | 19/46 [00:03<00:06, 4.49it/s]\u001b[A\n",
+ " 43%|██████████████████▋ | 20/46 [00:03<00:05, 4.73it/s]\u001b[A\n",
+ " 46%|███████████████████▋ | 21/46 [00:03<00:05, 4.44it/s]\u001b[A\n",
+ " 48%|████████████████████▌ | 22/46 [00:04<00:09, 2.42it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 23/46 [00:05<00:14, 1.60it/s]\u001b[A\n",
+ " 52%|██████████████████████▍ | 24/46 [00:06<00:10, 2.01it/s]\u001b[A\n",
+ " 54%|███████████████████████▎ | 25/46 [00:06<00:08, 2.56it/s]\u001b[A\n",
+ " 57%|████████████████████████▎ | 26/46 [00:06<00:06, 3.08it/s]\u001b[A\n",
+ " 59%|█████████████████████████▏ | 27/46 [00:06<00:05, 3.68it/s]\u001b[A\n",
+ " 61%|██████████████████████████▏ | 28/46 [00:06<00:04, 4.17it/s]\u001b[A\n",
+ " 63%|███████████████████████████ | 29/46 [00:06<00:03, 4.56it/s]\u001b[A\n",
+ " 65%|████████████████████████████ | 30/46 [00:07<00:03, 4.55it/s]\u001b[A\n",
+ " 67%|████████████████████████████▉ | 31/46 [00:07<00:03, 4.33it/s]\u001b[A\n",
+ " 70%|█████████████████████████████▉ | 32/46 [00:07<00:03, 3.76it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▊ | 33/46 [00:08<00:04, 2.88it/s]\u001b[A\n",
+ " 74%|███████████████████████████████▊ | 34/46 [00:08<00:04, 2.61it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 35/46 [00:09<00:05, 2.11it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▋ | 36/46 [00:09<00:04, 2.31it/s]\u001b[A\n",
+ " 80%|██████████████████████████████████▌ | 37/46 [00:10<00:03, 2.61it/s]\u001b[A\n",
+ " 83%|███████████████████████████████████▌ | 38/46 [00:10<00:02, 2.80it/s]\u001b[A\n",
+ " 85%|████████████████████████████████████▍ | 39/46 [00:10<00:02, 2.93it/s]\u001b[A\n",
+ " 87%|█████████████████████████████████████▍ | 40/46 [00:10<00:01, 3.12it/s]\u001b[A\n",
+ " 89%|██████████████████████████████████████▎ | 41/46 [00:11<00:01, 3.19it/s]\u001b[A\n",
+ " 91%|███████████████████████████████████████▎ | 42/46 [00:11<00:01, 3.09it/s]\u001b[A\n",
+ " 93%|████████████████████████████████████████▏ | 43/46 [00:11<00:00, 3.34it/s]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▏ | 44/46 [00:11<00:00, 3.84it/s]\u001b[A\n",
+ " 98%|██████████████████████████████████████████ | 45/46 [00:12<00:00, 4.50it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 2.715369939804077, 'eval_runtime': 12.5246, 'eval_samples_per_second': 3.673, 'eval_steps_per_second': 3.673, 'epoch': 5.0}\n",
+ " 83%|█████████████████████████████▏ | 2800/3360 [5:17:08<1:03:45, 6.83s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:12<00:00, 5.17it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-07-04 20:43:31,199 >> Saving model checkpoint to saves/qwen2-7b/lora/sft/checkpoint-2800\n",
+ "[INFO|configuration_utils.py:733] 2024-07-04 20:43:32,430 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-04 20:43:32,431 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-7B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 3584,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 18944,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 28,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 4,\n",
+ " \"quantization_config\": {\n",
+ " \"_load_in_4bit\": true,\n",
+ " \"_load_in_8bit\": false,\n",
+ " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n",
+ " \"bnb_4bit_quant_storage\": \"uint8\",\n",
+ " \"bnb_4bit_quant_type\": \"nf4\",\n",
+ " \"bnb_4bit_use_double_quant\": true,\n",
+ " \"llm_int8_enable_fp32_cpu_offload\": false,\n",
+ " \"llm_int8_has_fp16_weight\": false,\n",
+ " \"llm_int8_skip_modules\": null,\n",
+ " \"llm_int8_threshold\": 6.0,\n",
+ " \"load_in_4bit\": true,\n",
+ " \"load_in_8bit\": false,\n",
+ " \"quant_method\": \"bitsandbytes\"\n",
+ " },\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 131072,\n",
+ " \"tie_word_embeddings\": false,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 152064\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-07-04 20:43:32,551 >> tokenizer config file saved in saves/qwen2-7b/lora/sft/checkpoint-2800/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-07-04 20:43:32,551 >> Special tokens file saved in saves/qwen2-7b/lora/sft/checkpoint-2800/special_tokens_map.json\n",
+ "{'loss': 0.0078, 'grad_norm': 1.6351463794708252, 'learning_rate': 7.942430149009161e-06, 'epoch': 5.02}\n",
+ "{'loss': 0.0062, 'grad_norm': 0.11965573579072952, 'learning_rate': 7.663790038585793e-06, 'epoch': 5.03}\n",
+ "{'loss': 0.0105, 'grad_norm': 0.05803072825074196, 'learning_rate': 7.389719171023857e-06, 'epoch': 5.05}\n",
+ "{'loss': 0.0058, 'grad_norm': 0.143271803855896, 'learning_rate': 7.1202471261170245e-06, 'epoch': 5.07}\n",
+ "{'loss': 0.0035, 'grad_norm': 0.17391343414783478, 'learning_rate': 6.855402987319348e-06, 'epoch': 5.09}\n",
+ "{'loss': 0.0072, 'grad_norm': 0.19679808616638184, 'learning_rate': 6.595215338606397e-06, 'epoch': 5.1}\n",
+ "{'loss': 0.0123, 'grad_norm': 0.09687481820583344, 'learning_rate': 6.339712261390213e-06, 'epoch': 5.12}\n",
+ "{'loss': 0.0057, 'grad_norm': 0.069660983979702, 'learning_rate': 6.088921331488568e-06, 'epoch': 5.14}\n",
+ "{'loss': 0.0072, 'grad_norm': 1.3626017570495605, 'learning_rate': 5.8428696161488215e-06, 'epoch': 5.16}\n",
+ "{'loss': 0.0047, 'grad_norm': 2.0419363975524902, 'learning_rate': 5.601583671126531e-06, 'epoch': 5.18}\n",
+ "{'loss': 0.0097, 'grad_norm': 0.2337513566017151, 'learning_rate': 5.365089537819434e-06, 'epoch': 5.19}\n",
+ "{'loss': 0.0042, 'grad_norm': 0.05815720558166504, 'learning_rate': 5.133412740456806e-06, 'epoch': 5.21}\n",
+ "{'loss': 0.008, 'grad_norm': 1.3515617847442627, 'learning_rate': 4.906578283344759e-06, 'epoch': 5.23}\n",
+ "{'loss': 0.0087, 'grad_norm': 0.37659117579460144, 'learning_rate': 4.684610648167503e-06, 'epoch': 5.25}\n",
+ "{'loss': 0.0031, 'grad_norm': 0.33385252952575684, 'learning_rate': 4.467533791345191e-06, 'epoch': 5.27}\n",
+ "{'loss': 0.0067, 'grad_norm': 0.15747712552547455, 'learning_rate': 4.255371141448272e-06, 'epoch': 5.28}\n",
+ "{'loss': 0.007, 'grad_norm': 1.2530337572097778, 'learning_rate': 4.048145596668967e-06, 'epoch': 5.3}\n",
+ "{'loss': 0.0136, 'grad_norm': 2.182263135910034, 'learning_rate': 3.84587952234991e-06, 'epoch': 5.32}\n",
+ "{'loss': 0.0035, 'grad_norm': 1.1545133590698242, 'learning_rate': 3.6485947485702832e-06, 'epoch': 5.34}\n",
+ "{'loss': 0.0061, 'grad_norm': 0.33282843232154846, 'learning_rate': 3.4563125677897932e-06, 'epoch': 5.35}\n",
+ "{'loss': 0.004, 'grad_norm': 0.2662621736526489, 'learning_rate': 3.269053732550581e-06, 'epoch': 5.37}\n",
+ "{'loss': 0.0071, 'grad_norm': 1.1687767505645752, 'learning_rate': 3.086838453237506e-06, 'epoch': 5.39}\n",
+ "{'loss': 0.0082, 'grad_norm': 0.12040398269891739, 'learning_rate': 2.9096863958968268e-06, 'epoch': 5.41}\n",
+ "{'loss': 0.0042, 'grad_norm': 0.22544123232364655, 'learning_rate': 2.737616680113758e-06, 'epoch': 5.43}\n",
+ "{'loss': 0.0056, 'grad_norm': 0.3548804521560669, 'learning_rate': 2.570647876948895e-06, 'epoch': 5.44}\n",
+ "{'loss': 0.0133, 'grad_norm': 0.7295147180557251, 'learning_rate': 2.408798006933882e-06, 'epoch': 5.46}\n",
+ "{'loss': 0.0125, 'grad_norm': 0.05939454585313797, 'learning_rate': 2.252084538126542e-06, 'epoch': 5.48}\n",
+ "{'loss': 0.0064, 'grad_norm': 0.5182624459266663, 'learning_rate': 2.100524384225555e-06, 'epoch': 5.5}\n",
+ "{'loss': 0.0043, 'grad_norm': 0.13460208475589752, 'learning_rate': 1.9541339027450256e-06, 'epoch': 5.52}\n",
+ "{'loss': 0.0066, 'grad_norm': 0.8837604522705078, 'learning_rate': 1.8129288932490274e-06, 'epoch': 5.53}\n",
+ "{'loss': 0.0092, 'grad_norm': 0.332492858171463, 'learning_rate': 1.6769245956464396e-06, 'epoch': 5.55}\n",
+ "{'loss': 0.0048, 'grad_norm': 0.2933903634548187, 'learning_rate': 1.5461356885461075e-06, 'epoch': 5.57}\n",
+ "{'loss': 0.0054, 'grad_norm': 0.371267706155777, 'learning_rate': 1.4205762876726092e-06, 'epoch': 5.59}\n",
+ "{'loss': 0.0083, 'grad_norm': 0.14521144330501556, 'learning_rate': 1.3002599443428243e-06, 'epoch': 5.6}\n",
+ "{'loss': 0.0073, 'grad_norm': 1.345499038696289, 'learning_rate': 1.1851996440033319e-06, 'epoch': 5.62}\n",
+ "{'loss': 0.0064, 'grad_norm': 0.025303443893790245, 'learning_rate': 1.0754078048289374e-06, 'epoch': 5.64}\n",
+ "{'loss': 0.0049, 'grad_norm': 1.9373172521591187, 'learning_rate': 9.708962763824048e-07, 'epoch': 5.66}\n",
+ "{'loss': 0.0063, 'grad_norm': 0.6459546685218811, 'learning_rate': 8.716763383355864e-07, 'epoch': 5.68}\n",
+ "{'loss': 0.005, 'grad_norm': 1.4349000453948975, 'learning_rate': 7.777586992519959e-07, 'epoch': 5.69}\n",
+ "{'loss': 0.0103, 'grad_norm': 0.5553787350654602, 'learning_rate': 6.891534954310885e-07, 'epoch': 5.71}\n",
+ "{'loss': 0.0054, 'grad_norm': 0.19051159918308258, 'learning_rate': 6.058702898142643e-07, 'epoch': 5.73}\n",
+ "{'loss': 0.0059, 'grad_norm': 0.36273324489593506, 'learning_rate': 5.279180709527765e-07, 'epoch': 5.75}\n",
+ "{'loss': 0.0084, 'grad_norm': 0.4064849019050598, 'learning_rate': 4.553052520375911e-07, 'epoch': 5.77}\n",
+ "{'loss': 0.0033, 'grad_norm': 0.2132396250963211, 'learning_rate': 3.8803966999139684e-07, 'epoch': 5.78}\n",
+ "{'loss': 0.0176, 'grad_norm': 2.6782572269439697, 'learning_rate': 3.261285846227868e-07, 'epoch': 5.8}\n",
+ "{'loss': 0.0064, 'grad_norm': 0.27686187624931335, 'learning_rate': 2.6957867784270787e-07, 'epoch': 5.82}\n",
+ "{'loss': 0.0041, 'grad_norm': 0.86066734790802, 'learning_rate': 2.1839605294330933e-07, 'epoch': 5.84}\n",
+ "{'loss': 0.0082, 'grad_norm': 0.16934335231781006, 'learning_rate': 1.725862339392259e-07, 'epoch': 5.85}\n",
+ "{'loss': 0.0047, 'grad_norm': 0.6522320508956909, 'learning_rate': 1.3215416497138754e-07, 'epoch': 5.87}\n",
+ "{'loss': 0.0063, 'grad_norm': 0.5966488718986511, 'learning_rate': 9.710420977340762e-08, 'epoch': 5.89}\n",
+ "{'loss': 0.0038, 'grad_norm': 0.1901843547821045, 'learning_rate': 6.744015120061509e-08, 'epoch': 5.91}\n",
+ "{'loss': 0.0123, 'grad_norm': 2.4536399841308594, 'learning_rate': 4.316519082179227e-08, 'epoch': 5.93}\n",
+ "{'loss': 0.0048, 'grad_norm': 0.5865656733512878, 'learning_rate': 2.4281948573617874e-08, 'epoch': 5.94}\n",
+ "{'loss': 0.006, 'grad_norm': 0.9566450715065002, 'learning_rate': 1.0792462477909882e-08, 'epoch': 5.96}\n",
+ "{'loss': 0.0043, 'grad_norm': 1.3847167491912842, 'learning_rate': 2.6981884216847884e-09, 'epoch': 5.98}\n",
+ "{'loss': 0.0049, 'grad_norm': 1.5407752990722656, 'learning_rate': 0.0, 'epoch': 6.0}\n",
+ "100%|█████████████████████████████████████| 3360/3360 [6:19:44<00:00, 6.09s/it][INFO|trainer.py:3788] 2024-07-04 21:46:06,786 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-07-04 21:46:06,786 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-07-04 21:46:06,786 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 4%|█▉ | 2/46 [00:00<00:02, 14.89it/s]\u001b[A\n",
+ " 9%|███▊ | 4/46 [00:00<00:04, 8.77it/s]\u001b[A\n",
+ " 13%|█████▋ | 6/46 [00:00<00:05, 7.73it/s]\u001b[A\n",
+ " 15%|██████▋ | 7/46 [00:00<00:05, 7.51it/s]\u001b[A\n",
+ " 17%|███████▋ | 8/46 [00:01<00:05, 7.23it/s]\u001b[A\n",
+ " 20%|████████▌ | 9/46 [00:01<00:05, 7.01it/s]\u001b[A\n",
+ " 22%|█████████▎ | 10/46 [00:01<00:05, 6.49it/s]\u001b[A\n",
+ " 24%|██████████▎ | 11/46 [00:01<00:06, 5.13it/s]\u001b[A\n",
+ " 26%|███████████▏ | 12/46 [00:02<00:09, 3.59it/s]\u001b[A\n",
+ " 28%|████████████▏ | 13/46 [00:04<00:25, 1.31it/s]\u001b[A\n",
+ " 30%|█████████████ | 14/46 [00:04<00:21, 1.48it/s]\u001b[A\n",
+ " 33%|██████████████ | 15/46 [00:04<00:17, 1.80it/s]\u001b[A\n",
+ " 35%|██████████████▉ | 16/46 [00:05<00:14, 2.05it/s]\u001b[A\n",
+ " 37%|███████████████▉ | 17/46 [00:05<00:12, 2.33it/s]\u001b[A\n",
+ " 39%|████████████████▊ | 18/46 [00:05<00:10, 2.67it/s]\u001b[A\n",
+ " 41%|█████████████████▊ | 19/46 [00:05<00:09, 2.93it/s]\u001b[A\n",
+ " 43%|██████████████████▋ | 20/46 [00:06<00:08, 3.07it/s]\u001b[A\n",
+ " 46%|███████████████████▋ | 21/46 [00:06<00:08, 2.99it/s]\u001b[A\n",
+ " 48%|████████████████████▌ | 22/46 [00:07<00:09, 2.66it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 23/46 [00:07<00:09, 2.37it/s]\u001b[A\n",
+ " 52%|██████████████████████▍ | 24/46 [00:08<00:09, 2.37it/s]\u001b[A\n",
+ " 54%|███████████████████████▎ | 25/46 [00:08<00:08, 2.49it/s]\u001b[A\n",
+ " 57%|████████████████████████▎ | 26/46 [00:08<00:07, 2.56it/s]\u001b[A\n",
+ " 59%|█████████████████████████▏ | 27/46 [00:09<00:07, 2.63it/s]\u001b[A\n",
+ " 61%|██████████████████████████▏ | 28/46 [00:09<00:06, 2.76it/s]\u001b[A\n",
+ " 63%|███████████████████████████ | 29/46 [00:09<00:05, 2.96it/s]\u001b[A\n",
+ " 65%|████████████████████████████ | 30/46 [00:09<00:05, 3.06it/s]\u001b[A\n",
+ " 67%|████████████████████████████▉ | 31/46 [00:10<00:04, 3.26it/s]\u001b[A\n",
+ " 70%|█████████████████████████████▉ | 32/46 [00:10<00:04, 3.45it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▊ | 33/46 [00:10<00:03, 3.75it/s]\u001b[A\n",
+ " 74%|███████████████████████████████▊ | 34/46 [00:10<00:02, 4.03it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 35/46 [00:11<00:02, 4.23it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▋ | 36/46 [00:11<00:02, 4.50it/s]\u001b[A\n",
+ " 80%|██████████████████████████████████▌ | 37/46 [00:11<00:02, 4.37it/s]\u001b[A\n",
+ " 83%|███████████████████████████████████▌ | 38/46 [00:11<00:01, 4.16it/s]\u001b[A\n",
+ " 85%|████████████████████████████████████▍ | 39/46 [00:12<00:01, 4.24it/s]\u001b[A\n",
+ " 87%|█████████████████████████████████████▍ | 40/46 [00:12<00:01, 4.10it/s]\u001b[A\n",
+ " 89%|██████████████████████████████████████▎ | 41/46 [00:12<00:01, 3.90it/s]\u001b[A\n",
+ " 91%|███████████████████████████████████████▎ | 42/46 [00:12<00:01, 3.65it/s]\u001b[A\n",
+ " 93%|████████████████████████████████████████▏ | 43/46 [00:13<00:00, 3.55it/s]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▏ | 44/46 [00:13<00:00, 3.46it/s]\u001b[A\n",
+ " 98%|██████████████████████████████████████████ | 45/46 [00:13<00:00, 3.14it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 2.9878008365631104, 'eval_runtime': 14.6844, 'eval_samples_per_second': 3.133, 'eval_steps_per_second': 3.133, 'epoch': 6.0}\n",
+ "100%|█████████████████████████████████████| 3360/3360 [6:19:59<00:00, 6.09s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:14<00:00, 2.44it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-07-04 21:46:21,487 >> Saving model checkpoint to saves/qwen2-7b/lora/sft/checkpoint-3360\n",
+ "[INFO|configuration_utils.py:733] 2024-07-04 21:46:23,425 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-04 21:46:23,426 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-7B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 3584,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 18944,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 28,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 4,\n",
+ " \"quantization_config\": {\n",
+ " \"_load_in_4bit\": true,\n",
+ " \"_load_in_8bit\": false,\n",
+ " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n",
+ " \"bnb_4bit_quant_storage\": \"uint8\",\n",
+ " \"bnb_4bit_quant_type\": \"nf4\",\n",
+ " \"bnb_4bit_use_double_quant\": true,\n",
+ " \"llm_int8_enable_fp32_cpu_offload\": false,\n",
+ " \"llm_int8_has_fp16_weight\": false,\n",
+ " \"llm_int8_skip_modules\": null,\n",
+ " \"llm_int8_threshold\": 6.0,\n",
+ " \"load_in_4bit\": true,\n",
+ " \"load_in_8bit\": false,\n",
+ " \"quant_method\": \"bitsandbytes\"\n",
+ " },\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 131072,\n",
+ " \"tie_word_embeddings\": false,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 152064\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-07-04 21:46:23,565 >> tokenizer config file saved in saves/qwen2-7b/lora/sft/checkpoint-3360/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-07-04 21:46:23,565 >> Special tokens file saved in saves/qwen2-7b/lora/sft/checkpoint-3360/special_tokens_map.json\n",
+ "[INFO|:482] 2024-07-04 21:46:23,978 >> \n",
+ "\n",
+ "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
+ "\n",
+ "\n",
+ "{'train_runtime': 22807.0531, 'train_samples_per_second': 1.179, 'train_steps_per_second': 0.147, 'train_loss': 0.5189488330479002, 'epoch': 6.0}\n",
+ "100%|█████████████████████████████████████| 3360/3360 [6:20:01<00:00, 6.79s/it]\n",
+ "[INFO|trainer.py:3478] 2024-07-04 21:46:23,983 >> Saving model checkpoint to saves/qwen2-7b/lora/sft\n",
+ "[INFO|configuration_utils.py:733] 2024-07-04 21:46:25,525 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-04 21:46:25,525 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-7B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 3584,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 18944,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 28,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 4,\n",
+ " \"quantization_config\": {\n",
+ " \"_load_in_4bit\": true,\n",
+ " \"_load_in_8bit\": false,\n",
+ " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n",
+ " \"bnb_4bit_quant_storage\": \"uint8\",\n",
+ " \"bnb_4bit_quant_type\": \"nf4\",\n",
+ " \"bnb_4bit_use_double_quant\": true,\n",
+ " \"llm_int8_enable_fp32_cpu_offload\": false,\n",
+ " \"llm_int8_has_fp16_weight\": false,\n",
+ " \"llm_int8_skip_modules\": null,\n",
+ " \"llm_int8_threshold\": 6.0,\n",
+ " \"load_in_4bit\": true,\n",
+ " \"load_in_8bit\": false,\n",
+ " \"quant_method\": \"bitsandbytes\"\n",
+ " },\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 131072,\n",
+ " \"tie_word_embeddings\": false,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 152064\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-07-04 21:46:25,650 >> tokenizer config file saved in saves/qwen2-7b/lora/sft/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-07-04 21:46:25,650 >> Special tokens file saved in saves/qwen2-7b/lora/sft/special_tokens_map.json\n",
+ "***** train metrics *****\n",
+ " epoch = 5.9973\n",
+ " total_flos = 89914948GF\n",
+ " train_loss = 0.5189\n",
+ " train_runtime = 6:20:07.05\n",
+ " train_samples_per_second = 1.179\n",
+ " train_steps_per_second = 0.147\n",
+ "Figure saved at: saves/qwen2-7b/lora/sft/training_loss.png\n",
+ "Figure saved at: saves/qwen2-7b/lora/sft/training_eval_loss.png\n",
+ "[INFO|trainer.py:3788] 2024-07-04 21:46:26,044 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-07-04 21:46:26,044 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-07-04 21:46:26,045 >> Batch size = 1\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:08<00:00, 5.41it/s]\n",
+ "***** eval metrics *****\n",
+ " epoch = 5.9973\n",
+ " eval_loss = 2.9878\n",
+ " eval_runtime = 0:00:08.78\n",
+ " eval_samples_per_second = 5.234\n",
+ " eval_steps_per_second = 5.234\n",
+ "[INFO|modelcard.py:449] 2024-07-04 21:46:34,837 >> Dropping the following result as it does not have all the necessary fields:\n",
+ "{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}}\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: / 0.092 MB of 0.092 MB uploaded\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss ▁▂▃▅▇██\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime █▇█▆▅█▁\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second ▁▂▁▂▃▁█\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second ▁▂▁▂▃▁█\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch ▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step ▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/grad_norm ▃▂▃▃▃▄▄▄▆▆▄▅▅▆▅▆▅▅▅▇▄▆▇▅▄▄▄█▃▂▄▄▃▁▁▁▂▁▁▃\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate ▂▄▅▇██████▇▇▇▇▇▆▆▆▆▅▅▅▄▄▄▃▃▃▃▂▂▂▂▁▁▁▁▁▁▁\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss ███▇▇▇▇▅▅▅▅▅▅▅▃▃▃▃▂▂▁▁▂▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: \n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss 2.9878\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime 8.7891\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second 5.234\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second 5.234\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: total_flos 9.654544053942682e+16\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch 5.99732\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step 3360\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/grad_norm 1.54078\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate 0.0\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss 0.0049\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train_loss 0.51895\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train_runtime 22807.0531\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train_samples_per_second 1.179\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train_steps_per_second 0.147\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: \n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mqwen2_7b_lora_sft\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface/runs/o710838e\u001b[0m\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at: \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface\u001b[0m\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 6 W&B file(s), 0 media file(s), 1 artifact file(s) and 0 other file(s)\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20240704_152618-o710838e/logs\u001b[0m\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m The new W&B backend becomes opt-out in version 0.18.0; try it out with `wandb.require(\"core\")`! See https://wandb.me/wandb-core for more information.\n",
+ "CPU times: user 23min 50s, sys: 8min 47s, total: 32min 37s\n",
+ "Wall time: 6h 56min 32s\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "!./scripts/tune-lf.sh config/qwen2_7b_lora_sft_unsloth.yaml"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Current Directory:\n",
+ "/home/inflaton/code/projects/courses/llm-finetuning/llama-factory\n",
+ "07/04/2024 21:56:42 - WARNING - llamafactory.hparams.parser - We recommend enable `upcast_layernorm` in quantized training.\n",
+ "07/04/2024 21:56:42 - INFO - llamafactory.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, compute dtype: torch.bfloat16\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 21:56:42,789 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/vocab.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 21:56:42,789 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/merges.txt\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 21:56:42,789 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 21:56:42,789 >> loading file added_tokens.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 21:56:42,789 >> loading file special_tokens_map.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 21:56:42,789 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer_config.json\n",
+ "[WARNING|logging.py:313] 2024-07-04 21:56:42,918 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "07/04/2024 21:56:42 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+ "07/04/2024 21:56:42 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+ "07/04/2024 21:56:42 - INFO - llamafactory.data.loader - Loading dataset alpaca_mac.json...\n",
+ "Converting format of dataset (num_proc=16): 100%|█| 4528/4528 [00:00<00:00, 1521\n",
+ "Running tokenizer on dataset (num_proc=16): 100%|█| 4528/4528 [00:01<00:00, 2757\n",
+ "input_ids:\n",
+ "[151644, 872, 198, 5501, 14683, 279, 2701, 8453, 1467, 1119, 6364, 323, 3410, 1172, 279, 24531, 2213, 11, 4302, 770, 624, 35987, 102895, 99164, 100324, 100717, 100095, 99509, 1773, 151645, 198, 151644, 77091, 198, 17949, 358, 572, 2617, 553, 264, 38835, 44486, 13, 151645]\n",
+ "inputs:\n",
+ "<|im_start|>user\n",
+ "Please translate the following Chinese text into English and provide only the translated content, nothing else.\n",
+ "全仗着狐仙搭救。<|im_end|>\n",
+ "<|im_start|>assistant\n",
+ "Because I was protected by a fox fairy.<|im_end|>\n",
+ "label_ids:\n",
+ "[-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 17949, 358, 572, 2617, 553, 264, 38835, 44486, 13, 151645]\n",
+ "labels:\n",
+ "Because I was protected by a fox fairy.<|im_end|>\n",
+ "[INFO|configuration_utils.py:733] 2024-07-04 21:56:47,196 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-04 21:56:47,197 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "07/04/2024 21:56:47 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n",
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "[INFO|configuration_utils.py:733] 2024-07-04 21:56:48,123 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-04 21:56:48,123 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"unsloth/qwen2-1.5b-instruct-bnb-4bit\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"quantization_config\": {\n",
+ " \"_load_in_4bit\": true,\n",
+ " \"_load_in_8bit\": false,\n",
+ " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n",
+ " \"bnb_4bit_quant_storage\": \"uint8\",\n",
+ " \"bnb_4bit_quant_type\": \"nf4\",\n",
+ " \"bnb_4bit_use_double_quant\": true,\n",
+ " \"llm_int8_enable_fp32_cpu_offload\": false,\n",
+ " \"llm_int8_has_fp16_weight\": false,\n",
+ " \"llm_int8_skip_modules\": null,\n",
+ " \"llm_int8_threshold\": 6.0,\n",
+ " \"load_in_4bit\": true,\n",
+ " \"load_in_8bit\": false,\n",
+ " \"quant_method\": \"bitsandbytes\"\n",
+ " },\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "==((====))== Unsloth: Fast Qwen2 patching release 2024.6\n",
+ " \\\\ /| GPU: NVIDIA GeForce RTX 4080 Laptop GPU. Max memory: 11.994 GB. Platform = Linux.\n",
+ "O^O/ \\_/ \\ Pytorch: 2.3.0+cu121. CUDA = 8.9. CUDA Toolkit = 12.1.\n",
+ "\\ / Bfloat16 = TRUE. Xformers = 0.0.26.post1. FA = False.\n",
+ " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n",
+ "[INFO|configuration_utils.py:733] 2024-07-04 21:56:49,865 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-04 21:56:49,865 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"unsloth/qwen2-1.5b-instruct-bnb-4bit\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"quantization_config\": {\n",
+ " \"_load_in_4bit\": true,\n",
+ " \"_load_in_8bit\": false,\n",
+ " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n",
+ " \"bnb_4bit_quant_storage\": \"uint8\",\n",
+ " \"bnb_4bit_quant_type\": \"nf4\",\n",
+ " \"bnb_4bit_use_double_quant\": true,\n",
+ " \"llm_int8_enable_fp32_cpu_offload\": false,\n",
+ " \"llm_int8_has_fp16_weight\": false,\n",
+ " \"llm_int8_skip_modules\": null,\n",
+ " \"llm_int8_threshold\": 6.0,\n",
+ " \"load_in_4bit\": true,\n",
+ " \"load_in_8bit\": false,\n",
+ " \"quant_method\": \"bitsandbytes\"\n",
+ " },\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|configuration_utils.py:733] 2024-07-04 21:56:50,495 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-04 21:56:50,496 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"unsloth/qwen2-1.5b-instruct-bnb-4bit\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"quantization_config\": {\n",
+ " \"_load_in_4bit\": true,\n",
+ " \"_load_in_8bit\": false,\n",
+ " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n",
+ " \"bnb_4bit_quant_storage\": \"uint8\",\n",
+ " \"bnb_4bit_quant_type\": \"nf4\",\n",
+ " \"bnb_4bit_use_double_quant\": true,\n",
+ " \"llm_int8_enable_fp32_cpu_offload\": false,\n",
+ " \"llm_int8_has_fp16_weight\": false,\n",
+ " \"llm_int8_skip_modules\": null,\n",
+ " \"llm_int8_threshold\": 6.0,\n",
+ " \"load_in_4bit\": true,\n",
+ " \"load_in_8bit\": false,\n",
+ " \"quant_method\": \"bitsandbytes\"\n",
+ " },\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|modeling_utils.py:3556] 2024-07-04 21:56:50,707 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/model.safetensors\n",
+ "[INFO|modeling_utils.py:1531] 2024-07-04 21:56:56,626 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+ "[INFO|configuration_utils.py:1000] 2024-07-04 21:56:56,631 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645\n",
+ "}\n",
+ "\n",
+ "[INFO|modeling_utils.py:4364] 2024-07-04 21:58:31,535 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+ "\n",
+ "[INFO|modeling_utils.py:4372] 2024-07-04 21:58:31,535 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at unsloth/qwen2-1.5b-instruct-bnb-4bit.\n",
+ "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+ "[INFO|configuration_utils.py:955] 2024-07-04 21:58:32,073 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/generation_config.json\n",
+ "[INFO|configuration_utils.py:1000] 2024-07-04 21:58:32,073 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"do_sample\": true,\n",
+ " \"eos_token_id\": [\n",
+ " 151645,\n",
+ " 151643\n",
+ " ],\n",
+ " \"pad_token_id\": 151643,\n",
+ " \"repetition_penalty\": 1.1,\n",
+ " \"temperature\": 0.7,\n",
+ " \"top_k\": 20,\n",
+ " \"top_p\": 0.8\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 21:58:33,489 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/vocab.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 21:58:33,489 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/merges.txt\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 21:58:33,489 >> loading file added_tokens.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/added_tokens.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 21:58:33,490 >> loading file special_tokens_map.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/special_tokens_map.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 21:58:33,490 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 21:58:33,490 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/tokenizer.json\n",
+ "[WARNING|logging.py:313] 2024-07-04 21:58:33,937 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 21:58:34,912 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/vocab.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 21:58:34,912 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/merges.txt\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 21:58:34,912 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/tokenizer.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 21:58:34,912 >> loading file added_tokens.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/added_tokens.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 21:58:34,912 >> loading file special_tokens_map.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/special_tokens_map.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-04 21:58:34,912 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/tokenizer_config.json\n",
+ "[WARNING|logging.py:313] 2024-07-04 21:58:35,100 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "07/04/2024 21:58:35 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.\n",
+ "07/04/2024 21:58:35 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.\n",
+ "07/04/2024 21:58:35 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA\n",
+ "07/04/2024 21:58:35 - INFO - llamafactory.model.model_utils.misc - Found linear modules: v_proj,k_proj,down_proj,gate_proj,q_proj,o_proj,up_proj\n",
+ "[WARNING|logging.py:328] 2024-07-04 21:58:36,612 >> Unsloth 2024.6 patched 28 layers with 0 QKV layers, 28 O layers and 28 MLP layers.\n",
+ "07/04/2024 21:58:37 - INFO - llamafactory.model.loader - trainable params: 9,232,384 || all params: 1,786,320,384 || trainable%: 0.5168\n",
+ "[INFO|trainer.py:642] 2024-07-04 21:58:37,463 >> Using auto half precision backend\n",
+ "07/04/2024 21:58:37 - WARNING - llamafactory.train.callbacks - Previous trainer log in this folder will be deleted.\n",
+ "07/04/2024 21:58:37 - INFO - llamafactory.train.trainer_utils - Using LoRA+ optimizer with loraplus lr ratio 16.00.\n",
+ "[WARNING|:223] 2024-07-04 21:58:37,613 >> ==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n",
+ " \\\\ /| Num examples = 4,482 | Num Epochs = 6\n",
+ "O^O/ \\_/ \\ Batch size per device = 1 | Gradient Accumulation steps = 8\n",
+ "\\ / Total batch size = 8 | Total steps = 3,360\n",
+ " \"-____-\" Number of trainable parameters = 9,232,384\n",
+ "[INFO|integration_utils.py:750] 2024-07-04 21:58:38,026 >> Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33minflaton-sg\u001b[0m (\u001b[33minflaton-ai\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.17.4\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m/home/inflaton/code/projects/courses/llm-finetuning/llama-factory/wandb/run-20240704_215839-4fbnqsea\u001b[0m\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mqwen2_1.5b_lora_sft\u001b[0m\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface\u001b[0m\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface/runs/4fbnqsea\u001b[0m\n",
+ "{'loss': 2.2167, 'grad_norm': 1.7105902433395386, 'learning_rate': 2.9761904761904763e-06, 'epoch': 0.02}\n",
+ "{'loss': 2.2613, 'grad_norm': 2.196908712387085, 'learning_rate': 5.9523809523809525e-06, 'epoch': 0.04}\n",
+ "{'loss': 2.0707, 'grad_norm': 0.9740070104598999, 'learning_rate': 8.92857142857143e-06, 'epoch': 0.05}\n",
+ "{'loss': 1.9514, 'grad_norm': 1.8389497995376587, 'learning_rate': 1.1904761904761905e-05, 'epoch': 0.07}\n",
+ "{'loss': 2.1174, 'grad_norm': 1.03471839427948, 'learning_rate': 1.4880952380952381e-05, 'epoch': 0.09}\n",
+ "{'loss': 1.8992, 'grad_norm': 1.198785662651062, 'learning_rate': 1.785714285714286e-05, 'epoch': 0.11}\n",
+ "{'loss': 2.0404, 'grad_norm': 1.114922285079956, 'learning_rate': 2.0833333333333336e-05, 'epoch': 0.12}\n",
+ "{'loss': 1.8447, 'grad_norm': 1.1239877939224243, 'learning_rate': 2.380952380952381e-05, 'epoch': 0.14}\n",
+ "{'loss': 1.9283, 'grad_norm': 1.5919139385223389, 'learning_rate': 2.6785714285714288e-05, 'epoch': 0.16}\n",
+ "{'loss': 1.9026, 'grad_norm': 0.998127818107605, 'learning_rate': 2.9761904761904762e-05, 'epoch': 0.18}\n",
+ "{'loss': 1.8932, 'grad_norm': 1.1430412530899048, 'learning_rate': 3.273809523809524e-05, 'epoch': 0.2}\n",
+ "{'loss': 1.8906, 'grad_norm': 1.0670546293258667, 'learning_rate': 3.571428571428572e-05, 'epoch': 0.21}\n",
+ "{'loss': 1.8343, 'grad_norm': 1.4356828927993774, 'learning_rate': 3.8690476190476195e-05, 'epoch': 0.23}\n",
+ "{'loss': 1.8725, 'grad_norm': 1.7043449878692627, 'learning_rate': 4.166666666666667e-05, 'epoch': 0.25}\n",
+ "{'loss': 1.7689, 'grad_norm': 1.09099543094635, 'learning_rate': 4.464285714285715e-05, 'epoch': 0.27}\n",
+ "{'loss': 1.7491, 'grad_norm': 0.9564052224159241, 'learning_rate': 4.761904761904762e-05, 'epoch': 0.29}\n",
+ "{'loss': 1.8979, 'grad_norm': 1.0268529653549194, 'learning_rate': 5.05952380952381e-05, 'epoch': 0.3}\n",
+ "{'loss': 1.695, 'grad_norm': 1.2585980892181396, 'learning_rate': 5.3571428571428575e-05, 'epoch': 0.32}\n",
+ "{'loss': 1.803, 'grad_norm': 2.146714448928833, 'learning_rate': 5.6547619047619046e-05, 'epoch': 0.34}\n",
+ "{'loss': 1.9439, 'grad_norm': 1.163086175918579, 'learning_rate': 5.9523809523809524e-05, 'epoch': 0.36}\n",
+ "{'loss': 1.8679, 'grad_norm': 1.2789676189422607, 'learning_rate': 6.25e-05, 'epoch': 0.37}\n",
+ "{'loss': 1.7942, 'grad_norm': 1.5350133180618286, 'learning_rate': 6.547619047619048e-05, 'epoch': 0.39}\n",
+ "{'loss': 1.7723, 'grad_norm': 1.333762526512146, 'learning_rate': 6.845238095238096e-05, 'epoch': 0.41}\n",
+ "{'loss': 1.9781, 'grad_norm': 1.342468500137329, 'learning_rate': 7.142857142857143e-05, 'epoch': 0.43}\n",
+ "{'loss': 1.8381, 'grad_norm': 1.785408616065979, 'learning_rate': 7.440476190476191e-05, 'epoch': 0.45}\n",
+ "{'loss': 1.77, 'grad_norm': 1.5936214923858643, 'learning_rate': 7.738095238095239e-05, 'epoch': 0.46}\n",
+ "{'loss': 1.8368, 'grad_norm': 1.7655868530273438, 'learning_rate': 8.035714285714287e-05, 'epoch': 0.48}\n",
+ "{'loss': 1.838, 'grad_norm': 1.5333795547485352, 'learning_rate': 8.333333333333334e-05, 'epoch': 0.5}\n",
+ "{'loss': 1.6551, 'grad_norm': 1.4578733444213867, 'learning_rate': 8.630952380952382e-05, 'epoch': 0.52}\n",
+ "{'loss': 1.8046, 'grad_norm': 1.649754524230957, 'learning_rate': 8.92857142857143e-05, 'epoch': 0.54}\n",
+ "{'loss': 1.8364, 'grad_norm': 1.618801236152649, 'learning_rate': 9.226190476190478e-05, 'epoch': 0.55}\n",
+ "{'loss': 1.6749, 'grad_norm': 2.321563243865967, 'learning_rate': 9.523809523809524e-05, 'epoch': 0.57}\n",
+ "{'loss': 1.7095, 'grad_norm': 1.7713825702667236, 'learning_rate': 9.821428571428572e-05, 'epoch': 0.59}\n",
+ "{'loss': 1.7458, 'grad_norm': 2.338412046432495, 'learning_rate': 9.999956828659095e-05, 'epoch': 0.61}\n",
+ "{'loss': 1.7693, 'grad_norm': 2.676462173461914, 'learning_rate': 9.999471159635539e-05, 'epoch': 0.62}\n",
+ "{'loss': 1.702, 'grad_norm': 1.777328610420227, 'learning_rate': 9.998445910004082e-05, 'epoch': 0.64}\n",
+ "{'loss': 1.8997, 'grad_norm': 2.657947063446045, 'learning_rate': 9.996881190417393e-05, 'epoch': 0.66}\n",
+ "{'loss': 1.8264, 'grad_norm': 1.7980377674102783, 'learning_rate': 9.994777169751806e-05, 'epoch': 0.68}\n",
+ "{'loss': 1.5464, 'grad_norm': 1.6675528287887573, 'learning_rate': 9.992134075089084e-05, 'epoch': 0.7}\n",
+ "{'loss': 1.7621, 'grad_norm': 2.088773727416992, 'learning_rate': 9.988952191691925e-05, 'epoch': 0.71}\n",
+ "{'loss': 1.7907, 'grad_norm': 1.8195936679840088, 'learning_rate': 9.985231862973168e-05, 'epoch': 0.73}\n",
+ "{'loss': 1.8215, 'grad_norm': 1.8611762523651123, 'learning_rate': 9.980973490458728e-05, 'epoch': 0.75}\n",
+ "{'loss': 1.7694, 'grad_norm': 2.018522024154663, 'learning_rate': 9.976177533744261e-05, 'epoch': 0.77}\n",
+ "{'loss': 1.741, 'grad_norm': 2.393932342529297, 'learning_rate': 9.97084451044556e-05, 'epoch': 0.79}\n",
+ "{'loss': 1.6568, 'grad_norm': 1.8010462522506714, 'learning_rate': 9.964974996142698e-05, 'epoch': 0.8}\n",
+ "{'loss': 1.7109, 'grad_norm': 1.6937175989151, 'learning_rate': 9.958569624317893e-05, 'epoch': 0.82}\n",
+ "{'loss': 1.7973, 'grad_norm': 2.7904672622680664, 'learning_rate': 9.951629086287151e-05, 'epoch': 0.84}\n",
+ "{'loss': 1.7033, 'grad_norm': 1.759727954864502, 'learning_rate': 9.944154131125642e-05, 'epoch': 0.86}\n",
+ "{'loss': 1.7797, 'grad_norm': 1.7603638172149658, 'learning_rate': 9.936145565586871e-05, 'epoch': 0.87}\n",
+ "{'loss': 1.9387, 'grad_norm': 1.8501616716384888, 'learning_rate': 9.927604254015585e-05, 'epoch': 0.89}\n",
+ "{'loss': 1.8734, 'grad_norm': 1.8340226411819458, 'learning_rate': 9.918531118254507e-05, 'epoch': 0.91}\n",
+ "{'loss': 1.7725, 'grad_norm': 2.32716703414917, 'learning_rate': 9.90892713754483e-05, 'epoch': 0.93}\n",
+ "{'loss': 1.7641, 'grad_norm': 2.673140048980713, 'learning_rate': 9.898793348420536e-05, 'epoch': 0.95}\n",
+ "{'loss': 1.7813, 'grad_norm': 1.9481444358825684, 'learning_rate': 9.888130844596524e-05, 'epoch': 0.96}\n",
+ "{'loss': 1.7363, 'grad_norm': 2.068895101547241, 'learning_rate': 9.876940776850569e-05, 'epoch': 0.98}\n",
+ "{'loss': 1.725, 'grad_norm': 1.8741337060928345, 'learning_rate': 9.865224352899119e-05, 'epoch': 1.0}\n",
+ " 17%|██████▎ | 560/3360 [20:10<1:47:08, 2.30s/it][INFO|trainer.py:3788] 2024-07-04 22:18:54,222 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-07-04 22:18:54,223 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-07-04 22:18:54,223 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 7%|██▊ | 3/46 [00:00<00:02, 16.12it/s]\u001b[A\n",
+ " 11%|████▊ | 5/46 [00:00<00:02, 14.12it/s]\u001b[A\n",
+ " 15%|██████▋ | 7/46 [00:00<00:02, 13.49it/s]\u001b[A\n",
+ " 20%|████████▌ | 9/46 [00:00<00:02, 12.62it/s]\u001b[A\n",
+ " 24%|██████████▎ | 11/46 [00:00<00:03, 10.63it/s]\u001b[A\n",
+ " 28%|████████████▏ | 13/46 [00:01<00:03, 9.63it/s]\u001b[A\n",
+ " 33%|██████████████ | 15/46 [00:01<00:03, 9.49it/s]\u001b[A\n",
+ " 35%|██████████████▉ | 16/46 [00:01<00:03, 9.30it/s]\u001b[A\n",
+ " 37%|███████████████▉ | 17/46 [00:01<00:03, 8.91it/s]\u001b[A\n",
+ " 39%|████████████████▊ | 18/46 [00:01<00:03, 8.44it/s]\u001b[A\n",
+ " 41%|█████████████████▊ | 19/46 [00:01<00:03, 8.19it/s]\u001b[A\n",
+ " 43%|██████████████████▋ | 20/46 [00:02<00:03, 8.03it/s]\u001b[A\n",
+ " 46%|███████████████████▋ | 21/46 [00:02<00:03, 7.75it/s]\u001b[A\n",
+ " 48%|████████████████████▌ | 22/46 [00:02<00:03, 7.66it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 23/46 [00:02<00:03, 7.52it/s]\u001b[A\n",
+ " 52%|██████████████████████▍ | 24/46 [00:02<00:02, 7.59it/s]\u001b[A\n",
+ " 57%|████████████████████████▎ | 26/46 [00:02<00:02, 8.62it/s]\u001b[A\n",
+ " 59%|█████████████████████████▏ | 27/46 [00:02<00:02, 8.84it/s]\u001b[A\n",
+ " 63%|███████████████████████████ | 29/46 [00:03<00:01, 9.63it/s]\u001b[A\n",
+ " 67%|████████████████████████████▉ | 31/46 [00:03<00:01, 10.47it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▊ | 33/46 [00:03<00:01, 10.89it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 35/46 [00:03<00:00, 11.74it/s]\u001b[A\n",
+ " 80%|██████████████████████████████████▌ | 37/46 [00:03<00:00, 12.31it/s]\u001b[A\n",
+ " 85%|████████████████████████████████████▍ | 39/46 [00:03<00:00, 12.75it/s]\u001b[A\n",
+ " 89%|██████████████████████████████████████▎ | 41/46 [00:03<00:00, 12.89it/s]\u001b[A\n",
+ " 93%|████████████████████████████████████████▏ | 43/46 [00:04<00:00, 13.07it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 1.7500004768371582, 'eval_runtime': 4.4502, 'eval_samples_per_second': 10.337, 'eval_steps_per_second': 10.337, 'epoch': 1.0}\n",
+ " 17%|██████▎ | 560/3360 [20:14<1:47:08, 2.30s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:04<00:00, 13.30it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-07-04 22:18:58,675 >> Saving model checkpoint to saves/qwen2-1.5b/lora/sft/checkpoint-560\n",
+ "[INFO|configuration_utils.py:733] 2024-07-04 22:18:59,836 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-04 22:18:59,838 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"quantization_config\": {\n",
+ " \"_load_in_4bit\": true,\n",
+ " \"_load_in_8bit\": false,\n",
+ " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n",
+ " \"bnb_4bit_quant_storage\": \"uint8\",\n",
+ " \"bnb_4bit_quant_type\": \"nf4\",\n",
+ " \"bnb_4bit_use_double_quant\": true,\n",
+ " \"llm_int8_enable_fp32_cpu_offload\": false,\n",
+ " \"llm_int8_has_fp16_weight\": false,\n",
+ " \"llm_int8_skip_modules\": null,\n",
+ " \"llm_int8_threshold\": 6.0,\n",
+ " \"load_in_4bit\": true,\n",
+ " \"load_in_8bit\": false,\n",
+ " \"quant_method\": \"bitsandbytes\"\n",
+ " },\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-07-04 22:18:59,966 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/checkpoint-560/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-07-04 22:18:59,967 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/checkpoint-560/special_tokens_map.json\n",
+ "{'loss': 1.3163, 'grad_norm': 2.1074018478393555, 'learning_rate': 9.852982837266955e-05, 'epoch': 1.02}\n",
+ "{'loss': 1.1057, 'grad_norm': 2.284273147583008, 'learning_rate': 9.840217551150706e-05, 'epoch': 1.04}\n",
+ "{'loss': 1.3071, 'grad_norm': 1.8365180492401123, 'learning_rate': 9.826929872276255e-05, 'epoch': 1.05}\n",
+ "{'loss': 1.2093, 'grad_norm': 2.285874843597412, 'learning_rate': 9.81312123475006e-05, 'epoch': 1.07}\n",
+ "{'loss': 1.1653, 'grad_norm': 2.244819402694702, 'learning_rate': 9.798793128904356e-05, 'epoch': 1.09}\n",
+ "{'loss': 1.276, 'grad_norm': 2.5032386779785156, 'learning_rate': 9.78394710113631e-05, 'epoch': 1.11}\n",
+ "{'loss': 1.2116, 'grad_norm': 2.2835264205932617, 'learning_rate': 9.768584753741134e-05, 'epoch': 1.12}\n",
+ "{'loss': 1.1953, 'grad_norm': 3.390573740005493, 'learning_rate': 9.752707744739145e-05, 'epoch': 1.14}\n",
+ "{'loss': 1.2537, 'grad_norm': 2.312870502471924, 'learning_rate': 9.736317787696816e-05, 'epoch': 1.16}\n",
+ "{'loss': 1.1042, 'grad_norm': 2.252488851547241, 'learning_rate': 9.719416651541839e-05, 'epoch': 1.18}\n",
+ "{'loss': 1.1397, 'grad_norm': 1.93602454662323, 'learning_rate': 9.702006160372209e-05, 'epoch': 1.2}\n",
+ "{'loss': 1.1822, 'grad_norm': 2.3258895874023438, 'learning_rate': 9.684088193259355e-05, 'epoch': 1.21}\n",
+ "{'loss': 1.1777, 'grad_norm': 2.362091064453125, 'learning_rate': 9.665664684045333e-05, 'epoch': 1.23}\n",
+ "{'loss': 1.2182, 'grad_norm': 2.36861515045166, 'learning_rate': 9.646737621134112e-05, 'epoch': 1.25}\n",
+ "{'loss': 1.181, 'grad_norm': 3.928402900695801, 'learning_rate': 9.627309047276974e-05, 'epoch': 1.27}\n",
+ "{'loss': 1.3375, 'grad_norm': 3.1305952072143555, 'learning_rate': 9.607381059352038e-05, 'epoch': 1.29}\n",
+ "{'loss': 1.2559, 'grad_norm': 2.16672682762146, 'learning_rate': 9.586955808137958e-05, 'epoch': 1.3}\n",
+ "{'loss': 1.26, 'grad_norm': 2.531378984451294, 'learning_rate': 9.566035498081784e-05, 'epoch': 1.32}\n",
+ "{'loss': 1.2656, 'grad_norm': 2.2649450302124023, 'learning_rate': 9.544622387061055e-05, 'epoch': 1.34}\n",
+ "{'loss': 1.0581, 'grad_norm': 2.7688372135162354, 'learning_rate': 9.522718786140097e-05, 'epoch': 1.36}\n",
+ "{'loss': 1.2188, 'grad_norm': 3.3669986724853516, 'learning_rate': 9.500327059320606e-05, 'epoch': 1.37}\n",
+ "{'loss': 1.1538, 'grad_norm': 3.8478970527648926, 'learning_rate': 9.477449623286505e-05, 'epoch': 1.39}\n",
+ "{'loss': 1.0648, 'grad_norm': 2.5197343826293945, 'learning_rate': 9.454088947143116e-05, 'epoch': 1.41}\n",
+ "{'loss': 1.2997, 'grad_norm': 3.149819850921631, 'learning_rate': 9.430247552150673e-05, 'epoch': 1.43}\n",
+ "{'loss': 1.1716, 'grad_norm': 2.626891851425171, 'learning_rate': 9.405928011452211e-05, 'epoch': 1.45}\n",
+ "{'loss': 1.2123, 'grad_norm': 2.029723644256592, 'learning_rate': 9.381132949795861e-05, 'epoch': 1.46}\n",
+ "{'loss': 1.3143, 'grad_norm': 2.6693994998931885, 'learning_rate': 9.35586504325155e-05, 'epoch': 1.48}\n",
+ "{'loss': 1.2098, 'grad_norm': 2.4133574962615967, 'learning_rate': 9.330127018922194e-05, 'epoch': 1.5}\n",
+ "{'loss': 1.1153, 'grad_norm': 2.2110259532928467, 'learning_rate': 9.303921654649362e-05, 'epoch': 1.52}\n",
+ "{'loss': 1.2865, 'grad_norm': 2.425077438354492, 'learning_rate': 9.277251778713474e-05, 'epoch': 1.54}\n",
+ "{'loss': 1.2322, 'grad_norm': 2.287026882171631, 'learning_rate': 9.250120269528546e-05, 'epoch': 1.55}\n",
+ "{'loss': 1.1479, 'grad_norm': 2.3768105506896973, 'learning_rate': 9.22253005533154e-05, 'epoch': 1.57}\n",
+ "{'loss': 1.2783, 'grad_norm': 3.2799324989318848, 'learning_rate': 9.194484113866313e-05, 'epoch': 1.59}\n",
+ "{'loss': 1.3401, 'grad_norm': 2.8332979679107666, 'learning_rate': 9.165985472062246e-05, 'epoch': 1.61}\n",
+ "{'loss': 1.0948, 'grad_norm': 2.450061321258545, 'learning_rate': 9.137037205707552e-05, 'epoch': 1.62}\n",
+ "{'loss': 1.1901, 'grad_norm': 2.617992401123047, 'learning_rate': 9.107642439117321e-05, 'epoch': 1.64}\n",
+ "{'loss': 1.2412, 'grad_norm': 2.531679391860962, 'learning_rate': 9.077804344796302e-05, 'epoch': 1.66}\n",
+ "{'loss': 1.1875, 'grad_norm': 2.6147513389587402, 'learning_rate': 9.04752614309652e-05, 'epoch': 1.68}\n",
+ "{'loss': 1.1308, 'grad_norm': 3.1184866428375244, 'learning_rate': 9.01681110186971e-05, 'epoch': 1.7}\n",
+ "{'loss': 1.2466, 'grad_norm': 2.7524633407592773, 'learning_rate': 8.985662536114613e-05, 'epoch': 1.71}\n",
+ "{'loss': 1.1582, 'grad_norm': 2.410403251647949, 'learning_rate': 8.954083807619208e-05, 'epoch': 1.73}\n",
+ "{'loss': 1.2996, 'grad_norm': 3.132530927658081, 'learning_rate': 8.922078324597879e-05, 'epoch': 1.75}\n",
+ "{'loss': 1.3292, 'grad_norm': 3.093569755554199, 'learning_rate': 8.889649541323574e-05, 'epoch': 1.77}\n",
+ "{'loss': 1.2493, 'grad_norm': 2.8685665130615234, 'learning_rate': 8.856800957755e-05, 'epoch': 1.78}\n",
+ "{'loss': 1.2413, 'grad_norm': 3.4880857467651367, 'learning_rate': 8.823536119158864e-05, 'epoch': 1.8}\n",
+ "{'loss': 1.2145, 'grad_norm': 3.321408271789551, 'learning_rate': 8.789858615727265e-05, 'epoch': 1.82}\n",
+ "{'loss': 1.1431, 'grad_norm': 2.608922243118286, 'learning_rate': 8.755772082190194e-05, 'epoch': 1.84}\n",
+ "{'loss': 1.2395, 'grad_norm': 2.6196181774139404, 'learning_rate': 8.721280197423258e-05, 'epoch': 1.86}\n",
+ "{'loss': 1.0924, 'grad_norm': 3.0364978313446045, 'learning_rate': 8.68638668405062e-05, 'epoch': 1.87}\n",
+ "{'loss': 1.2218, 'grad_norm': 3.5102291107177734, 'learning_rate': 8.651095308043232e-05, 'epoch': 1.89}\n",
+ "{'loss': 1.2639, 'grad_norm': 4.278683662414551, 'learning_rate': 8.61540987831238e-05, 'epoch': 1.91}\n",
+ "{'loss': 1.2978, 'grad_norm': 3.729332208633423, 'learning_rate': 8.579334246298593e-05, 'epoch': 1.93}\n",
+ "{'loss': 1.1956, 'grad_norm': 3.6756839752197266, 'learning_rate': 8.542872305555978e-05, 'epoch': 1.95}\n",
+ "{'loss': 1.1345, 'grad_norm': 2.913640022277832, 'learning_rate': 8.50602799133199e-05, 'epoch': 1.96}\n",
+ "{'loss': 1.217, 'grad_norm': 2.75384259223938, 'learning_rate': 8.468805280142709e-05, 'epoch': 1.98}\n",
+ "{'loss': 1.2316, 'grad_norm': 3.1801509857177734, 'learning_rate': 8.43120818934367e-05, 'epoch': 2.0}\n",
+ " 33%|████████████▎ | 1120/3360 [41:10<1:24:27, 2.26s/it][INFO|trainer.py:3788] 2024-07-04 22:39:54,830 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-07-04 22:39:54,830 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-07-04 22:39:54,830 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 7%|██▊ | 3/46 [00:00<00:02, 19.32it/s]\u001b[A\n",
+ " 11%|████▊ | 5/46 [00:00<00:02, 14.57it/s]\u001b[A\n",
+ " 15%|██████▋ | 7/46 [00:00<00:03, 12.84it/s]\u001b[A\n",
+ " 20%|████████▌ | 9/46 [00:00<00:02, 12.95it/s]\u001b[A\n",
+ " 24%|██████████▎ | 11/46 [00:00<00:02, 12.99it/s]\u001b[A\n",
+ " 28%|████████████▏ | 13/46 [00:00<00:02, 13.33it/s]\u001b[A\n",
+ " 33%|██████████████ | 15/46 [00:01<00:02, 13.58it/s]\u001b[A\n",
+ " 37%|███████████████▉ | 17/46 [00:01<00:02, 13.47it/s]\u001b[A\n",
+ " 41%|█████████████████▊ | 19/46 [00:01<00:01, 13.51it/s]\u001b[A\n",
+ " 46%|███████████████████▋ | 21/46 [00:01<00:01, 13.40it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 23/46 [00:01<00:01, 13.16it/s]\u001b[A\n",
+ " 54%|███████████████████████▎ | 25/46 [00:01<00:01, 13.31it/s]\u001b[A\n",
+ " 59%|█████████████████████████▏ | 27/46 [00:02<00:01, 13.34it/s]\u001b[A\n",
+ " 63%|███████████████████████████ | 29/46 [00:02<00:01, 13.10it/s]\u001b[A\n",
+ " 67%|████████████████████████████▉ | 31/46 [00:02<00:01, 13.09it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▊ | 33/46 [00:02<00:01, 12.12it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 35/46 [00:02<00:00, 11.79it/s]\u001b[A\n",
+ " 80%|██████████████████████████████████▌ | 37/46 [00:02<00:00, 11.96it/s]\u001b[A\n",
+ " 85%|████████████████████████████████████▍ | 39/46 [00:03<00:00, 11.44it/s]\u001b[A\n",
+ " 89%|██████████████████████████████████████▎ | 41/46 [00:03<00:00, 11.61it/s]\u001b[A\n",
+ " 93%|████████████████████████████████████████▏ | 43/46 [00:03<00:00, 11.34it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 1.8573294878005981, 'eval_runtime': 3.7539, 'eval_samples_per_second': 12.254, 'eval_steps_per_second': 12.254, 'epoch': 2.0}\n",
+ " 33%|████████████▎ | 1120/3360 [41:14<1:24:27, 2.26s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:03<00:00, 11.28it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-07-04 22:39:58,587 >> Saving model checkpoint to saves/qwen2-1.5b/lora/sft/checkpoint-1120\n",
+ "[INFO|configuration_utils.py:733] 2024-07-04 22:39:59,689 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-04 22:39:59,690 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"quantization_config\": {\n",
+ " \"_load_in_4bit\": true,\n",
+ " \"_load_in_8bit\": false,\n",
+ " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n",
+ " \"bnb_4bit_quant_storage\": \"uint8\",\n",
+ " \"bnb_4bit_quant_type\": \"nf4\",\n",
+ " \"bnb_4bit_use_double_quant\": true,\n",
+ " \"llm_int8_enable_fp32_cpu_offload\": false,\n",
+ " \"llm_int8_has_fp16_weight\": false,\n",
+ " \"llm_int8_skip_modules\": null,\n",
+ " \"llm_int8_threshold\": 6.0,\n",
+ " \"load_in_4bit\": true,\n",
+ " \"load_in_8bit\": false,\n",
+ " \"quant_method\": \"bitsandbytes\"\n",
+ " },\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-07-04 22:39:59,739 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/checkpoint-1120/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-07-04 22:39:59,740 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/checkpoint-1120/special_tokens_map.json\n",
+ "{'loss': 0.6282, 'grad_norm': 2.980609893798828, 'learning_rate': 8.393240776696274e-05, 'epoch': 2.02}\n",
+ "{'loss': 0.516, 'grad_norm': 2.145615577697754, 'learning_rate': 8.354907139929851e-05, 'epoch': 2.03}\n",
+ "{'loss': 0.5637, 'grad_norm': 2.9827773571014404, 'learning_rate': 8.316211416299397e-05, 'epoch': 2.05}\n",
+ "{'loss': 0.5459, 'grad_norm': 6.992089748382568, 'learning_rate': 8.27715778213905e-05, 'epoch': 2.07}\n",
+ "{'loss': 0.4226, 'grad_norm': 2.0110838413238525, 'learning_rate': 8.237750452411353e-05, 'epoch': 2.09}\n",
+ "{'loss': 0.5595, 'grad_norm': 1.9566326141357422, 'learning_rate': 8.197993680252334e-05, 'epoch': 2.11}\n",
+ "{'loss': 0.6434, 'grad_norm': 3.0069830417633057, 'learning_rate': 8.157891756512488e-05, 'epoch': 2.12}\n",
+ "{'loss': 0.5572, 'grad_norm': 2.4360501766204834, 'learning_rate': 8.117449009293668e-05, 'epoch': 2.14}\n",
+ "{'loss': 0.5111, 'grad_norm': 3.1125354766845703, 'learning_rate': 8.076669803481965e-05, 'epoch': 2.16}\n",
+ "{'loss': 0.5006, 'grad_norm': 3.5583136081695557, 'learning_rate': 8.035558540276618e-05, 'epoch': 2.18}\n",
+ "{'loss': 0.5521, 'grad_norm': 2.597862482070923, 'learning_rate': 7.994119656715002e-05, 'epoch': 2.2}\n",
+ "{'loss': 0.6284, 'grad_norm': 3.2273318767547607, 'learning_rate': 7.952357625193749e-05, 'epoch': 2.21}\n",
+ "{'loss': 0.6074, 'grad_norm': 3.255476474761963, 'learning_rate': 7.91027695298606e-05, 'epoch': 2.23}\n",
+ "{'loss': 0.5721, 'grad_norm': 2.2420713901519775, 'learning_rate': 7.86788218175523e-05, 'epoch': 2.25}\n",
+ "{'loss': 0.5287, 'grad_norm': 3.241563558578491, 'learning_rate': 7.8251778870645e-05, 'epoch': 2.27}\n",
+ "{'loss': 0.5853, 'grad_norm': 3.7581467628479004, 'learning_rate': 7.782168677883206e-05, 'epoch': 2.28}\n",
+ "{'loss': 0.6142, 'grad_norm': 4.938629627227783, 'learning_rate': 7.738859196089358e-05, 'epoch': 2.3}\n",
+ "{'loss': 0.6064, 'grad_norm': 3.4490935802459717, 'learning_rate': 7.695254115968648e-05, 'epoch': 2.32}\n",
+ "{'loss': 0.6328, 'grad_norm': 3.473822832107544, 'learning_rate': 7.651358143709972e-05, 'epoch': 2.34}\n",
+ "{'loss': 0.6386, 'grad_norm': 3.5730648040771484, 'learning_rate': 7.60717601689749e-05, 'epoch': 2.36}\n",
+ "{'loss': 0.5591, 'grad_norm': 3.024034023284912, 'learning_rate': 7.562712503999327e-05, 'epoch': 2.37}\n",
+ "{'loss': 0.7168, 'grad_norm': 3.799771547317505, 'learning_rate': 7.517972403852905e-05, 'epoch': 2.39}\n",
+ "{'loss': 0.5869, 'grad_norm': 3.3111960887908936, 'learning_rate': 7.472960545147038e-05, 'epoch': 2.41}\n",
+ "{'loss': 0.5025, 'grad_norm': 3.5263280868530273, 'learning_rate': 7.427681785900761e-05, 'epoch': 2.43}\n",
+ "{'loss': 0.5964, 'grad_norm': 3.572462797164917, 'learning_rate': 7.382141012939034e-05, 'epoch': 2.45}\n",
+ "{'loss': 0.5491, 'grad_norm': 3.038294792175293, 'learning_rate': 7.33634314136531e-05, 'epoch': 2.46}\n",
+ "{'loss': 0.6004, 'grad_norm': 7.641390800476074, 'learning_rate': 7.290293114031061e-05, 'epoch': 2.48}\n",
+ "{'loss': 0.6356, 'grad_norm': 3.8366777896881104, 'learning_rate': 7.243995901002312e-05, 'epoch': 2.5}\n",
+ "{'loss': 0.5982, 'grad_norm': 3.146303176879883, 'learning_rate': 7.197456499023225e-05, 'epoch': 2.52}\n",
+ "{'loss': 0.6127, 'grad_norm': 4.154056072235107, 'learning_rate': 7.150679930976825e-05, 'epoch': 2.53}\n",
+ "{'loss': 0.5952, 'grad_norm': 2.470127582550049, 'learning_rate': 7.103671245342887e-05, 'epoch': 2.55}\n",
+ "{'loss': 0.4994, 'grad_norm': 5.2111053466796875, 'learning_rate': 7.056435515653059e-05, 'epoch': 2.57}\n",
+ "{'loss': 0.6969, 'grad_norm': 3.3999710083007812, 'learning_rate': 7.008977839943299e-05, 'epoch': 2.59}\n",
+ "{'loss': 0.6066, 'grad_norm': 3.942821979522705, 'learning_rate': 6.961303340203653e-05, 'epoch': 2.61}\n",
+ "{'loss': 0.6744, 'grad_norm': 3.511596918106079, 'learning_rate': 6.91341716182545e-05, 'epoch': 2.62}\n",
+ "{'loss': 0.5972, 'grad_norm': 2.605888605117798, 'learning_rate': 6.86532447304597e-05, 'epoch': 2.64}\n",
+ "{'loss': 0.6058, 'grad_norm': 3.500854730606079, 'learning_rate': 6.817030464390656e-05, 'epoch': 2.66}\n",
+ "{'loss': 0.6422, 'grad_norm': 2.9531426429748535, 'learning_rate': 6.768540348112907e-05, 'epoch': 2.68}\n",
+ "{'loss': 0.5311, 'grad_norm': 4.346620559692383, 'learning_rate': 6.719859357631535e-05, 'epoch': 2.7}\n",
+ "{'loss': 0.4986, 'grad_norm': 4.6267900466918945, 'learning_rate': 6.670992746965938e-05, 'epoch': 2.71}\n",
+ "{'loss': 0.6576, 'grad_norm': 4.185196876525879, 'learning_rate': 6.621945790169036e-05, 'epoch': 2.73}\n",
+ "{'loss': 0.6437, 'grad_norm': 3.265991687774658, 'learning_rate': 6.572723780758069e-05, 'epoch': 2.75}\n",
+ "{'loss': 0.574, 'grad_norm': 4.036723613739014, 'learning_rate': 6.523332031143272e-05, 'epoch': 2.77}\n",
+ "{'loss': 0.5839, 'grad_norm': 3.2608094215393066, 'learning_rate': 6.473775872054521e-05, 'epoch': 2.78}\n",
+ "{'loss': 0.5044, 'grad_norm': 3.3588390350341797, 'learning_rate': 6.424060651966007e-05, 'epoch': 2.8}\n",
+ "{'loss': 0.5707, 'grad_norm': 3.363955020904541, 'learning_rate': 6.374191736518974e-05, 'epoch': 2.82}\n",
+ "{'loss': 0.5785, 'grad_norm': 3.4573071002960205, 'learning_rate': 6.324174507942637e-05, 'epoch': 2.84}\n",
+ "{'loss': 0.5755, 'grad_norm': 4.1820855140686035, 'learning_rate': 6.274014364473274e-05, 'epoch': 2.86}\n",
+ "{'loss': 0.7532, 'grad_norm': 2.9372756481170654, 'learning_rate': 6.22371671977162e-05, 'epoch': 2.87}\n",
+ "{'loss': 0.6447, 'grad_norm': 4.2755632400512695, 'learning_rate': 6.173287002338577e-05, 'epoch': 2.89}\n",
+ "{'loss': 0.6018, 'grad_norm': 4.274354934692383, 'learning_rate': 6.122730654929334e-05, 'epoch': 2.91}\n",
+ "{'loss': 0.5677, 'grad_norm': 4.0272393226623535, 'learning_rate': 6.072053133965938e-05, 'epoch': 2.93}\n",
+ "{'loss': 0.6344, 'grad_norm': 3.0991122722625732, 'learning_rate': 6.021259908948402e-05, 'epoch': 2.95}\n",
+ "{'loss': 0.6559, 'grad_norm': 3.816575527191162, 'learning_rate': 5.970356461864391e-05, 'epoch': 2.96}\n",
+ "{'loss': 0.5647, 'grad_norm': 3.187918186187744, 'learning_rate': 5.919348286597569e-05, 'epoch': 2.98}\n",
+ "{'loss': 0.6381, 'grad_norm': 3.6101670265197754, 'learning_rate': 5.868240888334653e-05, 'epoch': 3.0}\n",
+ " 50%|█████████████████▌ | 1680/3360 [1:12:00<2:09:10, 4.61s/it][INFO|trainer.py:3788] 2024-07-04 23:10:44,677 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-07-04 23:10:44,677 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-07-04 23:10:44,677 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 4%|█▉ | 2/46 [00:00<00:03, 13.97it/s]\u001b[A\n",
+ " 9%|███▊ | 4/46 [00:00<00:05, 8.02it/s]\u001b[A\n",
+ " 11%|████▊ | 5/46 [00:00<00:05, 7.56it/s]\u001b[A\n",
+ " 13%|█████▋ | 6/46 [00:00<00:05, 6.98it/s]\u001b[A\n",
+ " 15%|██████▋ | 7/46 [00:00<00:05, 7.22it/s]\u001b[A\n",
+ " 17%|███████▋ | 8/46 [00:01<00:05, 6.73it/s]\u001b[A\n",
+ " 20%|████████▌ | 9/46 [00:01<00:05, 6.69it/s]\u001b[A\n",
+ " 22%|█████████▎ | 10/46 [00:01<00:05, 6.45it/s]\u001b[A\n",
+ " 24%|██████████▎ | 11/46 [00:01<00:04, 7.07it/s]\u001b[A\n",
+ " 26%|███████████▏ | 12/46 [00:01<00:04, 7.70it/s]\u001b[A\n",
+ " 28%|████████████▏ | 13/46 [00:01<00:04, 8.19it/s]\u001b[A\n",
+ " 30%|█████████████ | 14/46 [00:01<00:03, 8.08it/s]\u001b[A\n",
+ " 35%|██████████████▉ | 16/46 [00:02<00:03, 8.79it/s]\u001b[A\n",
+ " 37%|███████████████▉ | 17/46 [00:02<00:03, 8.65it/s]\u001b[A\n",
+ " 39%|████████████████▊ | 18/46 [00:02<00:03, 8.03it/s]\u001b[A\n",
+ " 43%|██████████████████▋ | 20/46 [00:02<00:02, 9.13it/s]\u001b[A\n",
+ " 48%|████████████████████▌ | 22/46 [00:02<00:02, 9.34it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 23/46 [00:02<00:02, 9.05it/s]\u001b[A\n",
+ " 52%|██████████████████████▍ | 24/46 [00:02<00:02, 9.08it/s]\u001b[A\n",
+ " 54%|███████████████████████▎ | 25/46 [00:03<00:02, 8.51it/s]\u001b[A\n",
+ " 57%|████████████████████████▎ | 26/46 [00:03<00:02, 8.09it/s]\u001b[A\n",
+ " 59%|█████████████████████████▏ | 27/46 [00:03<00:02, 7.71it/s]\u001b[A\n",
+ " 61%|██████████████████████████▏ | 28/46 [00:03<00:02, 7.18it/s]\u001b[A\n",
+ " 63%|███████████████████████████ | 29/46 [00:03<00:02, 6.16it/s]\u001b[A\n",
+ " 65%|████████████████████████████ | 30/46 [00:03<00:02, 5.74it/s]\u001b[A\n",
+ " 67%|████████████████████████████▉ | 31/46 [00:04<00:02, 5.64it/s]\u001b[A\n",
+ " 70%|█████████████████████████████▉ | 32/46 [00:04<00:02, 6.10it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▊ | 33/46 [00:04<00:02, 5.99it/s]\u001b[A\n",
+ " 74%|███████████████████████████████▊ | 34/46 [00:04<00:01, 6.17it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 35/46 [00:04<00:01, 6.58it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▋ | 36/46 [00:04<00:01, 6.82it/s]\u001b[A\n",
+ " 80%|██████████████████████████████████▌ | 37/46 [00:05<00:01, 6.79it/s]\u001b[A\n",
+ " 83%|███████████████████████████████████▌ | 38/46 [00:05<00:01, 6.98it/s]\u001b[A\n",
+ " 85%|████████████████████████████████████▍ | 39/46 [00:05<00:00, 7.14it/s]\u001b[A\n",
+ " 87%|█████████████████████████████████████▍ | 40/46 [00:05<00:00, 6.82it/s]\u001b[A\n",
+ " 89%|██████████████████████████████████████▎ | 41/46 [00:05<00:00, 6.82it/s]\u001b[A\n",
+ " 91%|███████████████████████████████████████▎ | 42/46 [00:05<00:00, 7.10it/s]\u001b[A\n",
+ " 93%|████████████████████████████████████████▏ | 43/46 [00:05<00:00, 6.85it/s]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▏ | 44/46 [00:05<00:00, 7.08it/s]\u001b[A\n",
+ " 98%|██████████████████████████████████████████ | 45/46 [00:06<00:00, 7.02it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 2.19065260887146, 'eval_runtime': 6.452, 'eval_samples_per_second': 7.13, 'eval_steps_per_second': 7.13, 'epoch': 3.0}\n",
+ " 50%|█████████████████▌ | 1680/3360 [1:12:07<2:09:10, 4.61s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:06<00:00, 7.19it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-07-04 23:10:51,132 >> Saving model checkpoint to saves/qwen2-1.5b/lora/sft/checkpoint-1680\n",
+ "[INFO|configuration_utils.py:733] 2024-07-04 23:10:52,385 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-04 23:10:52,387 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"quantization_config\": {\n",
+ " \"_load_in_4bit\": true,\n",
+ " \"_load_in_8bit\": false,\n",
+ " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n",
+ " \"bnb_4bit_quant_storage\": \"uint8\",\n",
+ " \"bnb_4bit_quant_type\": \"nf4\",\n",
+ " \"bnb_4bit_use_double_quant\": true,\n",
+ " \"llm_int8_enable_fp32_cpu_offload\": false,\n",
+ " \"llm_int8_has_fp16_weight\": false,\n",
+ " \"llm_int8_skip_modules\": null,\n",
+ " \"llm_int8_threshold\": 6.0,\n",
+ " \"load_in_4bit\": true,\n",
+ " \"load_in_8bit\": false,\n",
+ " \"quant_method\": \"bitsandbytes\"\n",
+ " },\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-07-04 23:10:52,534 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/checkpoint-1680/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-07-04 23:10:52,535 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/checkpoint-1680/special_tokens_map.json\n",
+ "{'loss': 0.3795, 'grad_norm': 3.6095104217529297, 'learning_rate': 5.8170397829712485e-05, 'epoch': 3.02}\n",
+ "{'loss': 0.1885, 'grad_norm': 2.648378610610962, 'learning_rate': 5.765750496516547e-05, 'epoch': 3.03}\n",
+ "{'loss': 0.2431, 'grad_norm': 3.102599859237671, 'learning_rate': 5.714378564496901e-05, 'epoch': 3.05}\n",
+ "{'loss': 0.2407, 'grad_norm': 1.7132669687271118, 'learning_rate': 5.6629295313583974e-05, 'epoch': 3.07}\n",
+ "{'loss': 0.1859, 'grad_norm': 2.363086462020874, 'learning_rate': 5.611408949868457e-05, 'epoch': 3.09}\n",
+ "{'loss': 0.2814, 'grad_norm': 2.7699074745178223, 'learning_rate': 5.559822380516539e-05, 'epoch': 3.11}\n",
+ "{'loss': 0.2066, 'grad_norm': 2.309485912322998, 'learning_rate': 5.5081753909140096e-05, 'epoch': 3.12}\n",
+ "{'loss': 0.2561, 'grad_norm': 3.8177757263183594, 'learning_rate': 5.456473555193242e-05, 'epoch': 3.14}\n",
+ "{'loss': 0.2839, 'grad_norm': 5.046483039855957, 'learning_rate': 5.404722453406017e-05, 'epoch': 3.16}\n",
+ "{'loss': 0.2309, 'grad_norm': 3.3046510219573975, 'learning_rate': 5.3529276709212816e-05, 'epoch': 3.18}\n",
+ "{'loss': 0.2678, 'grad_norm': 3.739877939224243, 'learning_rate': 5.30109479782233e-05, 'epoch': 3.2}\n",
+ "{'loss': 0.2305, 'grad_norm': 3.0891871452331543, 'learning_rate': 5.249229428303486e-05, 'epoch': 3.21}\n",
+ "{'loss': 0.3009, 'grad_norm': 2.0775339603424072, 'learning_rate': 5.197337160066331e-05, 'epoch': 3.23}\n",
+ "{'loss': 0.1974, 'grad_norm': 4.094172477722168, 'learning_rate': 5.145423593715557e-05, 'epoch': 3.25}\n",
+ "{'loss': 0.2613, 'grad_norm': 3.4857871532440186, 'learning_rate': 5.0934943321545115e-05, 'epoch': 3.27}\n",
+ "{'loss': 0.1759, 'grad_norm': 5.555017948150635, 'learning_rate': 5.041554979980486e-05, 'epoch': 3.28}\n",
+ "{'loss': 0.2755, 'grad_norm': 5.37070894241333, 'learning_rate': 4.9896111428798254e-05, 'epoch': 3.3}\n",
+ "{'loss': 0.3013, 'grad_norm': 3.0473411083221436, 'learning_rate': 4.9376684270229254e-05, 'epoch': 3.32}\n",
+ "{'loss': 0.2713, 'grad_norm': 2.421534299850464, 'learning_rate': 4.8857324384591653e-05, 'epoch': 3.34}\n",
+ "{'loss': 0.2342, 'grad_norm': 3.430769205093384, 'learning_rate': 4.8338087825118675e-05, 'epoch': 3.36}\n",
+ "{'loss': 0.2836, 'grad_norm': 3.117511510848999, 'learning_rate': 4.781903063173321e-05, 'epoch': 3.37}\n",
+ "{'loss': 0.2305, 'grad_norm': 2.2710249423980713, 'learning_rate': 4.730020882499964e-05, 'epoch': 3.39}\n",
+ "{'loss': 0.2707, 'grad_norm': 2.8062386512756348, 'learning_rate': 4.678167840007767e-05, 'epoch': 3.41}\n",
+ "{'loss': 0.2347, 'grad_norm': 3.199958324432373, 'learning_rate': 4.626349532067879e-05, 'epoch': 3.43}\n",
+ "{'loss': 0.2987, 'grad_norm': 2.9405529499053955, 'learning_rate': 4.574571551302647e-05, 'epoch': 3.44}\n",
+ "{'loss': 0.2748, 'grad_norm': 2.3248393535614014, 'learning_rate': 4.522839485981994e-05, 'epoch': 3.46}\n",
+ "{'loss': 0.2595, 'grad_norm': 2.7082927227020264, 'learning_rate': 4.471158919420312e-05, 'epoch': 3.48}\n",
+ "{'loss': 0.2452, 'grad_norm': 2.636992931365967, 'learning_rate': 4.4195354293738484e-05, 'epoch': 3.5}\n",
+ "{'loss': 0.2322, 'grad_norm': 2.870598554611206, 'learning_rate': 4.367974587438733e-05, 'epoch': 3.52}\n",
+ "{'loss': 0.2822, 'grad_norm': 2.3464884757995605, 'learning_rate': 4.316481958449634e-05, 'epoch': 3.53}\n",
+ "{'loss': 0.2228, 'grad_norm': 4.499746322631836, 'learning_rate': 4.2650630998791615e-05, 'epoch': 3.55}\n",
+ "{'loss': 0.2826, 'grad_norm': 3.5622456073760986, 'learning_rate': 4.213723561238074e-05, 'epoch': 3.57}\n",
+ "{'loss': 0.2505, 'grad_norm': 2.92927622795105, 'learning_rate': 4.162468883476319e-05, 'epoch': 3.59}\n",
+ "{'loss': 0.2715, 'grad_norm': 4.32992696762085, 'learning_rate': 4.111304598385018e-05, 'epoch': 3.61}\n",
+ "{'loss': 0.2382, 'grad_norm': 3.33722186088562, 'learning_rate': 4.060236227999441e-05, 'epoch': 3.62}\n",
+ "{'loss': 0.2219, 'grad_norm': 3.15584135055542, 'learning_rate': 4.0092692840030134e-05, 'epoch': 3.64}\n",
+ "{'loss': 0.2593, 'grad_norm': 2.6653778553009033, 'learning_rate': 3.9584092671324606e-05, 'epoch': 3.66}\n",
+ "{'loss': 0.2825, 'grad_norm': 2.261251449584961, 'learning_rate': 3.907661666584131e-05, 'epoch': 3.68}\n",
+ "{'loss': 0.2472, 'grad_norm': 2.40474796295166, 'learning_rate': 3.857031959421553e-05, 'epoch': 3.69}\n",
+ "{'loss': 0.2667, 'grad_norm': 3.5820109844207764, 'learning_rate': 3.806525609984312e-05, 'epoch': 3.71}\n",
+ "{'loss': 0.2426, 'grad_norm': 4.061399459838867, 'learning_rate': 3.7561480692983006e-05, 'epoch': 3.73}\n",
+ "{'loss': 0.3113, 'grad_norm': 3.3326733112335205, 'learning_rate': 3.705904774487396e-05, 'epoch': 3.75}\n",
+ "{'loss': 0.2658, 'grad_norm': 3.946682929992676, 'learning_rate': 3.655801148186655e-05, 'epoch': 3.77}\n",
+ "{'loss': 0.2051, 'grad_norm': 1.952369213104248, 'learning_rate': 3.6058425979570485e-05, 'epoch': 3.78}\n",
+ "{'loss': 0.249, 'grad_norm': 3.139533519744873, 'learning_rate': 3.556034515701852e-05, 'epoch': 3.8}\n",
+ "{'loss': 0.2398, 'grad_norm': 2.4751150608062744, 'learning_rate': 3.506382277084696e-05, 'epoch': 3.82}\n",
+ "{'loss': 0.2659, 'grad_norm': 1.9120585918426514, 'learning_rate': 3.4568912409493945e-05, 'epoch': 3.84}\n",
+ "{'loss': 0.1847, 'grad_norm': 2.8865163326263428, 'learning_rate': 3.4075667487415785e-05, 'epoch': 3.86}\n",
+ "{'loss': 0.2245, 'grad_norm': 3.2274515628814697, 'learning_rate': 3.358414123932195e-05, 'epoch': 3.87}\n",
+ "{'loss': 0.2643, 'grad_norm': 2.924294948577881, 'learning_rate': 3.3094386714429724e-05, 'epoch': 3.89}\n",
+ "{'loss': 0.252, 'grad_norm': 3.187256336212158, 'learning_rate': 3.2606456770738636e-05, 'epoch': 3.91}\n",
+ "{'loss': 0.1969, 'grad_norm': 2.353398084640503, 'learning_rate': 3.212040406932569e-05, 'epoch': 3.93}\n",
+ "{'loss': 0.2, 'grad_norm': 2.357897996902466, 'learning_rate': 3.163628106866172e-05, 'epoch': 3.94}\n",
+ "{'loss': 0.2773, 'grad_norm': 3.165809392929077, 'learning_rate': 3.115414001894974e-05, 'epoch': 3.96}\n",
+ "{'loss': 0.2495, 'grad_norm': 3.546583414077759, 'learning_rate': 3.067403295648566e-05, 'epoch': 3.98}\n",
+ "{'loss': 0.2513, 'grad_norm': 3.0604918003082275, 'learning_rate': 3.019601169804216e-05, 'epoch': 4.0}\n",
+ " 67%|███████████████████████▎ | 2240/3360 [1:48:51<1:17:16, 4.14s/it][INFO|trainer.py:3788] 2024-07-04 23:47:35,277 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-07-04 23:47:35,278 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-07-04 23:47:35,278 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 4%|█▉ | 2/46 [00:00<00:02, 17.70it/s]\u001b[A\n",
+ " 9%|███▊ | 4/46 [00:00<00:03, 11.55it/s]\u001b[A\n",
+ " 13%|█████▋ | 6/46 [00:00<00:03, 11.20it/s]\u001b[A\n",
+ " 17%|███████▋ | 8/46 [00:00<00:03, 10.88it/s]\u001b[A\n",
+ " 22%|█████████▎ | 10/46 [00:00<00:03, 10.72it/s]\u001b[A\n",
+ " 26%|███████████▏ | 12/46 [00:01<00:03, 10.60it/s]\u001b[A\n",
+ " 30%|█████████████ | 14/46 [00:01<00:03, 9.99it/s]\u001b[A\n",
+ " 35%|██████████████▉ | 16/46 [00:01<00:03, 9.65it/s]\u001b[A\n",
+ " 37%|███████████████▉ | 17/46 [00:01<00:03, 9.65it/s]\u001b[A\n",
+ " 39%|████████████████▊ | 18/46 [00:01<00:02, 9.40it/s]\u001b[A\n",
+ " 41%|█████████████████▊ | 19/46 [00:01<00:02, 9.48it/s]\u001b[A\n",
+ " 43%|██████████████████▋ | 20/46 [00:01<00:02, 9.53it/s]\u001b[A\n",
+ " 46%|███████████████████▋ | 21/46 [00:02<00:02, 9.54it/s]\u001b[A\n",
+ " 48%|████████████████████▌ | 22/46 [00:02<00:02, 9.41it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 23/46 [00:02<00:02, 9.45it/s]\u001b[A\n",
+ " 52%|██████████████████████▍ | 24/46 [00:02<00:02, 9.51it/s]\u001b[A\n",
+ " 54%|███████████████████████▎ | 25/46 [00:02<00:02, 9.48it/s]\u001b[A\n",
+ " 57%|████████████████████████▎ | 26/46 [00:02<00:02, 9.35it/s]\u001b[A\n",
+ " 59%|█████████████████████████▏ | 27/46 [00:02<00:02, 8.71it/s]\u001b[A\n",
+ " 61%|██████████████████████████▏ | 28/46 [00:02<00:02, 8.77it/s]\u001b[A\n",
+ " 63%|███████████████████████████ | 29/46 [00:02<00:01, 8.66it/s]\u001b[A\n",
+ " 65%|████████████████████████████ | 30/46 [00:03<00:01, 8.43it/s]\u001b[A\n",
+ " 67%|████████████████████████████▉ | 31/46 [00:03<00:01, 8.09it/s]\u001b[A\n",
+ " 70%|█████████████████████████████▉ | 32/46 [00:03<00:01, 7.41it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▊ | 33/46 [00:03<00:01, 7.04it/s]\u001b[A\n",
+ " 74%|███████████████████████████████▊ | 34/46 [00:03<00:01, 6.71it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 35/46 [00:03<00:01, 6.51it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▋ | 36/46 [00:04<00:01, 6.30it/s]\u001b[A\n",
+ " 80%|██████████████████████████████████▌ | 37/46 [00:04<00:01, 4.95it/s]\u001b[A\n",
+ " 83%|███████████████████████████████████▌ | 38/46 [00:04<00:01, 4.03it/s]\u001b[A\n",
+ " 85%|████████████████████████████████████▍ | 39/46 [00:05<00:02, 3.03it/s]\u001b[A\n",
+ " 87%|█████████████████████████████████████▍ | 40/46 [00:05<00:01, 3.01it/s]\u001b[A\n",
+ " 89%|██████████████████████████████████████▎ | 41/46 [00:05<00:01, 3.11it/s]\u001b[A\n",
+ " 91%|███████████████████████████████████████▎ | 42/46 [00:06<00:01, 3.36it/s]\u001b[A\n",
+ " 93%|████████████████████████████████████████▏ | 43/46 [00:06<00:00, 3.57it/s]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▏ | 44/46 [00:06<00:00, 3.79it/s]\u001b[A\n",
+ " 98%|██████████████████████████████████████████ | 45/46 [00:06<00:00, 3.73it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 2.564648389816284, 'eval_runtime': 7.2063, 'eval_samples_per_second': 6.383, 'eval_steps_per_second': 6.383, 'epoch': 4.0}\n",
+ " 67%|███████████████████████▎ | 2240/3360 [1:48:58<1:17:16, 4.14s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:07<00:00, 4.11it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-07-04 23:47:42,489 >> Saving model checkpoint to saves/qwen2-1.5b/lora/sft/checkpoint-2240\n",
+ "[INFO|configuration_utils.py:733] 2024-07-04 23:47:44,213 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-04 23:47:44,213 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"quantization_config\": {\n",
+ " \"_load_in_4bit\": true,\n",
+ " \"_load_in_8bit\": false,\n",
+ " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n",
+ " \"bnb_4bit_quant_storage\": \"uint8\",\n",
+ " \"bnb_4bit_quant_type\": \"nf4\",\n",
+ " \"bnb_4bit_use_double_quant\": true,\n",
+ " \"llm_int8_enable_fp32_cpu_offload\": false,\n",
+ " \"llm_int8_has_fp16_weight\": false,\n",
+ " \"llm_int8_skip_modules\": null,\n",
+ " \"llm_int8_threshold\": 6.0,\n",
+ " \"load_in_4bit\": true,\n",
+ " \"load_in_8bit\": false,\n",
+ " \"quant_method\": \"bitsandbytes\"\n",
+ " },\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-07-04 23:47:44,277 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/checkpoint-2240/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-07-04 23:47:44,277 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/checkpoint-2240/special_tokens_map.json\n",
+ "{'loss': 0.1188, 'grad_norm': 1.1784201860427856, 'learning_rate': 2.9720127835276256e-05, 'epoch': 4.02}\n",
+ "{'loss': 0.0602, 'grad_norm': 1.9491609334945679, 'learning_rate': 2.9246432729161055e-05, 'epoch': 4.03}\n",
+ "{'loss': 0.1191, 'grad_norm': 8.893132209777832, 'learning_rate': 2.8774977504442647e-05, 'epoch': 4.05}\n",
+ "{'loss': 0.0814, 'grad_norm': 2.4567410945892334, 'learning_rate': 2.8305813044122097e-05, 'epoch': 4.07}\n",
+ "{'loss': 0.0717, 'grad_norm': 1.0190716981887817, 'learning_rate': 2.7838989983964065e-05, 'epoch': 4.09}\n",
+ "{'loss': 0.1036, 'grad_norm': 2.9603097438812256, 'learning_rate': 2.737455870703155e-05, 'epoch': 4.11}\n",
+ "{'loss': 0.0639, 'grad_norm': 0.43291687965393066, 'learning_rate': 2.6912569338248315e-05, 'epoch': 4.12}\n",
+ "{'loss': 0.1147, 'grad_norm': 16.320343017578125, 'learning_rate': 2.645307173898901e-05, 'epoch': 4.14}\n",
+ "{'loss': 0.083, 'grad_norm': 2.4415814876556396, 'learning_rate': 2.5996115501697694e-05, 'epoch': 4.16}\n",
+ "{'loss': 0.0894, 'grad_norm': 2.5519323348999023, 'learning_rate': 2.5541749944535554e-05, 'epoch': 4.18}\n",
+ "{'loss': 0.0634, 'grad_norm': 1.0128456354141235, 'learning_rate': 2.5090024106057962e-05, 'epoch': 4.19}\n",
+ "{'loss': 0.0978, 'grad_norm': 0.7708680629730225, 'learning_rate': 2.464098673992205e-05, 'epoch': 4.21}\n",
+ "{'loss': 0.0895, 'grad_norm': 2.129037618637085, 'learning_rate': 2.4194686309624663e-05, 'epoch': 4.23}\n",
+ "{'loss': 0.0986, 'grad_norm': 2.0388691425323486, 'learning_rate': 2.3751170983272e-05, 'epoch': 4.25}\n",
+ "{'loss': 0.1058, 'grad_norm': 2.9288082122802734, 'learning_rate': 2.3310488628380757e-05, 'epoch': 4.27}\n",
+ "{'loss': 0.1175, 'grad_norm': 4.13016414642334, 'learning_rate': 2.2872686806712035e-05, 'epoch': 4.28}\n",
+ "{'loss': 0.1101, 'grad_norm': 2.0640783309936523, 'learning_rate': 2.243781276913811e-05, 'epoch': 4.3}\n",
+ "{'loss': 0.0602, 'grad_norm': 2.8615546226501465, 'learning_rate': 2.200591345054267e-05, 'epoch': 4.32}\n",
+ "{'loss': 0.1019, 'grad_norm': 3.2558248043060303, 'learning_rate': 2.157703546475539e-05, 'epoch': 4.34}\n",
+ "{'loss': 0.0819, 'grad_norm': 2.1427247524261475, 'learning_rate': 2.115122509952085e-05, 'epoch': 4.36}\n",
+ "{'loss': 0.0767, 'grad_norm': 7.249903202056885, 'learning_rate': 2.0728528311502976e-05, 'epoch': 4.37}\n",
+ "{'loss': 0.0823, 'grad_norm': 2.022773027420044, 'learning_rate': 2.0308990721324927e-05, 'epoch': 4.39}\n",
+ "{'loss': 0.0797, 'grad_norm': 3.4550766944885254, 'learning_rate': 1.989265760864542e-05, 'epoch': 4.41}\n",
+ "{'loss': 0.0927, 'grad_norm': 1.1615883111953735, 'learning_rate': 1.947957390727185e-05, 'epoch': 4.43}\n",
+ "{'loss': 0.0782, 'grad_norm': 3.103994607925415, 'learning_rate': 1.906978420031059e-05, 'epoch': 4.44}\n",
+ "{'loss': 0.0575, 'grad_norm': 1.6370556354522705, 'learning_rate': 1.8663332715355396e-05, 'epoch': 4.46}\n",
+ "{'loss': 0.1022, 'grad_norm': 1.106717824935913, 'learning_rate': 1.8260263319713844e-05, 'epoch': 4.48}\n",
+ "{'loss': 0.1071, 'grad_norm': 3.171022415161133, 'learning_rate': 1.7860619515673033e-05, 'epoch': 4.5}\n",
+ "{'loss': 0.1038, 'grad_norm': 1.9004364013671875, 'learning_rate': 1.746444443580433e-05, 'epoch': 4.52}\n",
+ "{'loss': 0.0836, 'grad_norm': 1.7966681718826294, 'learning_rate': 1.7071780838308288e-05, 'epoch': 4.53}\n",
+ "{'loss': 0.0773, 'grad_norm': 2.2593512535095215, 'learning_rate': 1.6682671102399805e-05, 'epoch': 4.55}\n",
+ "{'loss': 0.0671, 'grad_norm': 2.4209578037261963, 'learning_rate': 1.629715722373423e-05, 'epoch': 4.57}\n",
+ "{'loss': 0.0869, 'grad_norm': 3.6910362243652344, 'learning_rate': 1.5915280809874932e-05, 'epoch': 4.59}\n",
+ "{'loss': 0.0713, 'grad_norm': 2.8420000076293945, 'learning_rate': 1.553708307580265e-05, 'epoch': 4.61}\n",
+ "{'loss': 0.0886, 'grad_norm': 1.897133469581604, 'learning_rate': 1.5162604839467265e-05, 'epoch': 4.62}\n",
+ "{'loss': 0.0804, 'grad_norm': 2.0078957080841064, 'learning_rate': 1.4791886517382413e-05, 'epoch': 4.64}\n",
+ "{'loss': 0.0828, 'grad_norm': 2.6949617862701416, 'learning_rate': 1.4424968120263504e-05, 'epoch': 4.66}\n",
+ "{'loss': 0.0906, 'grad_norm': 2.1701433658599854, 'learning_rate': 1.4061889248709343e-05, 'epoch': 4.68}\n",
+ "{'loss': 0.0854, 'grad_norm': 2.741436004638672, 'learning_rate': 1.370268908892825e-05, 'epoch': 4.69}\n",
+ "{'loss': 0.0847, 'grad_norm': 1.9649664163589478, 'learning_rate': 1.3347406408508695e-05, 'epoch': 4.71}\n",
+ "{'loss': 0.1074, 'grad_norm': 2.995682716369629, 'learning_rate': 1.2996079552235263e-05, 'epoch': 4.73}\n",
+ "{'loss': 0.0675, 'grad_norm': 1.7899149656295776, 'learning_rate': 1.264874643795021e-05, 'epoch': 4.75}\n",
+ "{'loss': 0.0736, 'grad_norm': 3.165422201156616, 'learning_rate': 1.230544455246101e-05, 'epoch': 4.77}\n",
+ "{'loss': 0.0949, 'grad_norm': 3.376789093017578, 'learning_rate': 1.1966210947494583e-05, 'epoch': 4.78}\n",
+ "{'loss': 0.0774, 'grad_norm': 0.7393803000450134, 'learning_rate': 1.1631082235698316e-05, 'epoch': 4.8}\n",
+ "{'loss': 0.0685, 'grad_norm': 4.275796890258789, 'learning_rate': 1.130009458668863e-05, 'epoch': 4.82}\n",
+ "{'loss': 0.0642, 'grad_norm': 1.65122652053833, 'learning_rate': 1.097328372314721e-05, 'epoch': 4.84}\n",
+ "{'loss': 0.0855, 'grad_norm': 1.4425795078277588, 'learning_rate': 1.0650684916965559e-05, 'epoch': 4.85}\n",
+ "{'loss': 0.0883, 'grad_norm': 2.1447832584381104, 'learning_rate': 1.0332332985438248e-05, 'epoch': 4.87}\n",
+ "{'loss': 0.1137, 'grad_norm': 2.644052743911743, 'learning_rate': 1.0018262287505086e-05, 'epoch': 4.89}\n",
+ "{'loss': 0.1026, 'grad_norm': 0.3625916838645935, 'learning_rate': 9.708506720042932e-06, 'epoch': 4.91}\n",
+ "{'loss': 0.0708, 'grad_norm': 0.9670233130455017, 'learning_rate': 9.403099714207175e-06, 'epoch': 4.93}\n",
+ "{'loss': 0.0886, 'grad_norm': 1.222226619720459, 'learning_rate': 9.102074231823727e-06, 'epoch': 4.94}\n",
+ "{'loss': 0.0913, 'grad_norm': 1.5419262647628784, 'learning_rate': 8.805462761831418e-06, 'epoch': 4.96}\n",
+ "{'loss': 0.105, 'grad_norm': 1.7759844064712524, 'learning_rate': 8.513297316775625e-06, 'epoch': 4.98}\n",
+ "{'loss': 0.0818, 'grad_norm': 1.2991019487380981, 'learning_rate': 8.225609429353187e-06, 'epoch': 5.0}\n",
+ " 83%|██████████████████████████████▊ | 2800/3360 [2:24:53<36:03, 3.86s/it][INFO|trainer.py:3788] 2024-07-05 00:23:37,381 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-07-05 00:23:37,382 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-07-05 00:23:37,382 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 7%|██▊ | 3/46 [00:00<00:02, 15.46it/s]\u001b[A\n",
+ " 11%|████▊ | 5/46 [00:00<00:03, 12.42it/s]\u001b[A\n",
+ " 15%|██████▋ | 7/46 [00:00<00:03, 11.09it/s]\u001b[A\n",
+ " 20%|████████▌ | 9/46 [00:00<00:03, 10.00it/s]\u001b[A\n",
+ " 24%|██████████▎ | 11/46 [00:01<00:03, 9.99it/s]\u001b[A\n",
+ " 28%|████████████▏ | 13/46 [00:01<00:03, 9.77it/s]\u001b[A\n",
+ " 30%|█████████████ | 14/46 [00:01<00:03, 9.58it/s]\u001b[A\n",
+ " 33%|██████████████ | 15/46 [00:01<00:03, 9.25it/s]\u001b[A\n",
+ " 35%|██████████████▉ | 16/46 [00:01<00:03, 8.92it/s]\u001b[A\n",
+ " 37%|███████████████▉ | 17/46 [00:01<00:03, 8.72it/s]\u001b[A\n",
+ " 39%|████████████████▊ | 18/46 [00:01<00:03, 8.10it/s]\u001b[A\n",
+ " 41%|█████████████████▊ | 19/46 [00:02<00:03, 7.66it/s]\u001b[A\n",
+ " 43%|██████████████████▋ | 20/46 [00:02<00:03, 7.21it/s]\u001b[A\n",
+ " 46%|███████████████████▋ | 21/46 [00:02<00:03, 7.20it/s]\u001b[A\n",
+ " 48%|████████████████████▌ | 22/46 [00:02<00:03, 6.78it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 23/46 [00:02<00:03, 6.77it/s]\u001b[A\n",
+ " 52%|██████████████████████▍ | 24/46 [00:02<00:03, 6.86it/s]\u001b[A\n",
+ " 54%|███████████████████████▎ | 25/46 [00:02<00:03, 6.94it/s]\u001b[A\n",
+ " 57%|████████████████████████▎ | 26/46 [00:03<00:02, 6.87it/s]\u001b[A\n",
+ " 59%|█████████████████████████▏ | 27/46 [00:03<00:02, 6.55it/s]\u001b[A\n",
+ " 61%|██████████████████████████▏ | 28/46 [00:03<00:02, 6.19it/s]\u001b[A\n",
+ " 63%|███████████████████████████ | 29/46 [00:03<00:02, 6.08it/s]\u001b[A\n",
+ " 65%|████████████████████████████ | 30/46 [00:03<00:02, 5.95it/s]\u001b[A\n",
+ " 67%|████████████████████████████▉ | 31/46 [00:03<00:02, 5.75it/s]\u001b[A\n",
+ " 70%|█████████████████████████████▉ | 32/46 [00:04<00:02, 5.04it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▊ | 33/46 [00:04<00:02, 4.73it/s]\u001b[A\n",
+ " 74%|███████████████████████████████▊ | 34/46 [00:04<00:02, 4.79it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 35/46 [00:04<00:02, 4.60it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▋ | 36/46 [00:05<00:01, 5.26it/s]\u001b[A\n",
+ " 80%|██████████████████████████████████▌ | 37/46 [00:05<00:01, 5.95it/s]\u001b[A\n",
+ " 83%|███████████████████████████████████▌ | 38/46 [00:05<00:01, 6.72it/s]\u001b[A\n",
+ " 85%|████████████████████████████████████▍ | 39/46 [00:05<00:00, 7.27it/s]\u001b[A\n",
+ " 87%|█████████████████████████████████████▍ | 40/46 [00:05<00:00, 7.76it/s]\u001b[A\n",
+ " 91%|███████████████████████████████████████▎ | 42/46 [00:05<00:00, 8.19it/s]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▏ | 44/46 [00:05<00:00, 8.96it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 3.000229835510254, 'eval_runtime': 6.222, 'eval_samples_per_second': 7.393, 'eval_steps_per_second': 7.393, 'epoch': 5.0}\n",
+ " 83%|██████████████████████████████▊ | 2800/3360 [2:24:59<36:03, 3.86s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:06<00:00, 8.63it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-07-05 00:23:43,607 >> Saving model checkpoint to saves/qwen2-1.5b/lora/sft/checkpoint-2800\n",
+ "[INFO|configuration_utils.py:733] 2024-07-05 00:23:45,000 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-05 00:23:45,001 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"quantization_config\": {\n",
+ " \"_load_in_4bit\": true,\n",
+ " \"_load_in_8bit\": false,\n",
+ " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n",
+ " \"bnb_4bit_quant_storage\": \"uint8\",\n",
+ " \"bnb_4bit_quant_type\": \"nf4\",\n",
+ " \"bnb_4bit_use_double_quant\": true,\n",
+ " \"llm_int8_enable_fp32_cpu_offload\": false,\n",
+ " \"llm_int8_has_fp16_weight\": false,\n",
+ " \"llm_int8_skip_modules\": null,\n",
+ " \"llm_int8_threshold\": 6.0,\n",
+ " \"load_in_4bit\": true,\n",
+ " \"load_in_8bit\": false,\n",
+ " \"quant_method\": \"bitsandbytes\"\n",
+ " },\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-07-05 00:23:45,087 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/checkpoint-2800/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-07-05 00:23:45,087 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/checkpoint-2800/special_tokens_map.json\n",
+ "{'loss': 0.0391, 'grad_norm': 1.8985695838928223, 'learning_rate': 7.942430149009161e-06, 'epoch': 5.02}\n",
+ "{'loss': 0.0262, 'grad_norm': 0.18104498088359833, 'learning_rate': 7.663790038585793e-06, 'epoch': 5.03}\n",
+ "{'loss': 0.0369, 'grad_norm': 0.4857228696346283, 'learning_rate': 7.389719171023857e-06, 'epoch': 5.05}\n",
+ "{'loss': 0.0285, 'grad_norm': 0.5048622488975525, 'learning_rate': 7.1202471261170245e-06, 'epoch': 5.07}\n",
+ "{'loss': 0.0239, 'grad_norm': 1.3091479539871216, 'learning_rate': 6.855402987319348e-06, 'epoch': 5.09}\n",
+ "{'loss': 0.0315, 'grad_norm': 0.7383649945259094, 'learning_rate': 6.595215338606397e-06, 'epoch': 5.1}\n",
+ "{'loss': 0.0227, 'grad_norm': 0.46847808361053467, 'learning_rate': 6.339712261390213e-06, 'epoch': 5.12}\n",
+ "{'loss': 0.0286, 'grad_norm': 2.871511936187744, 'learning_rate': 6.088921331488568e-06, 'epoch': 5.14}\n",
+ "{'loss': 0.0215, 'grad_norm': 0.5253076553344727, 'learning_rate': 5.8428696161488215e-06, 'epoch': 5.16}\n",
+ "{'loss': 0.0212, 'grad_norm': 0.7373698949813843, 'learning_rate': 5.601583671126531e-06, 'epoch': 5.18}\n",
+ "{'loss': 0.0468, 'grad_norm': 1.2003121376037598, 'learning_rate': 5.365089537819434e-06, 'epoch': 5.19}\n",
+ "{'loss': 0.0269, 'grad_norm': 0.1384514421224594, 'learning_rate': 5.133412740456806e-06, 'epoch': 5.21}\n",
+ "{'loss': 0.016, 'grad_norm': 0.6597172617912292, 'learning_rate': 4.906578283344759e-06, 'epoch': 5.23}\n",
+ "{'loss': 0.0273, 'grad_norm': 1.3373147249221802, 'learning_rate': 4.684610648167503e-06, 'epoch': 5.25}\n",
+ "{'loss': 0.022, 'grad_norm': 1.9218050241470337, 'learning_rate': 4.467533791345191e-06, 'epoch': 5.27}\n",
+ "{'loss': 0.0266, 'grad_norm': 0.33371880650520325, 'learning_rate': 4.255371141448272e-06, 'epoch': 5.28}\n",
+ "{'loss': 0.0246, 'grad_norm': 0.3639131486415863, 'learning_rate': 4.048145596668967e-06, 'epoch': 5.3}\n",
+ "{'loss': 0.04, 'grad_norm': 0.7324997186660767, 'learning_rate': 3.84587952234991e-06, 'epoch': 5.32}\n",
+ "{'loss': 0.02, 'grad_norm': 1.7712045907974243, 'learning_rate': 3.6485947485702832e-06, 'epoch': 5.34}\n",
+ "{'loss': 0.0304, 'grad_norm': 1.001847267150879, 'learning_rate': 3.4563125677897932e-06, 'epoch': 5.35}\n",
+ "{'loss': 0.0251, 'grad_norm': 1.4244178533554077, 'learning_rate': 3.269053732550581e-06, 'epoch': 5.37}\n",
+ "{'loss': 0.0201, 'grad_norm': 0.938901960849762, 'learning_rate': 3.086838453237506e-06, 'epoch': 5.39}\n",
+ "{'loss': 0.017, 'grad_norm': 0.722439706325531, 'learning_rate': 2.9096863958968268e-06, 'epoch': 5.41}\n",
+ "{'loss': 0.0278, 'grad_norm': 0.9856802225112915, 'learning_rate': 2.737616680113758e-06, 'epoch': 5.43}\n",
+ "{'loss': 0.0275, 'grad_norm': 1.7459590435028076, 'learning_rate': 2.570647876948895e-06, 'epoch': 5.44}\n",
+ "{'loss': 0.0419, 'grad_norm': 15.734712600708008, 'learning_rate': 2.408798006933882e-06, 'epoch': 5.46}\n",
+ "{'loss': 0.0498, 'grad_norm': 0.5652347207069397, 'learning_rate': 2.252084538126542e-06, 'epoch': 5.48}\n",
+ "{'loss': 0.0281, 'grad_norm': 0.6292805075645447, 'learning_rate': 2.100524384225555e-06, 'epoch': 5.5}\n",
+ "{'loss': 0.025, 'grad_norm': 1.3762198686599731, 'learning_rate': 1.9541339027450256e-06, 'epoch': 5.52}\n",
+ "{'loss': 0.0228, 'grad_norm': 0.6231855154037476, 'learning_rate': 1.8129288932490274e-06, 'epoch': 5.53}\n",
+ "{'loss': 0.021, 'grad_norm': 0.2345045506954193, 'learning_rate': 1.6769245956464396e-06, 'epoch': 5.55}\n",
+ "{'loss': 0.0314, 'grad_norm': 0.8907411694526672, 'learning_rate': 1.5461356885461075e-06, 'epoch': 5.57}\n",
+ "{'loss': 0.0324, 'grad_norm': 0.8636724948883057, 'learning_rate': 1.4205762876726092e-06, 'epoch': 5.59}\n",
+ "{'loss': 0.0306, 'grad_norm': 1.4055633544921875, 'learning_rate': 1.3002599443428243e-06, 'epoch': 5.6}\n",
+ "{'loss': 0.0276, 'grad_norm': 0.9670897722244263, 'learning_rate': 1.1851996440033319e-06, 'epoch': 5.62}\n",
+ "{'loss': 0.0328, 'grad_norm': 0.16922369599342346, 'learning_rate': 1.0754078048289374e-06, 'epoch': 5.64}\n",
+ "{'loss': 0.031, 'grad_norm': 1.8827847242355347, 'learning_rate': 9.708962763824048e-07, 'epoch': 5.66}\n",
+ "{'loss': 0.0214, 'grad_norm': 0.40066924691200256, 'learning_rate': 8.716763383355864e-07, 'epoch': 5.68}\n",
+ "{'loss': 0.0272, 'grad_norm': 0.28809547424316406, 'learning_rate': 7.777586992519959e-07, 'epoch': 5.69}\n",
+ "{'loss': 0.0253, 'grad_norm': 1.053158164024353, 'learning_rate': 6.891534954310885e-07, 'epoch': 5.71}\n",
+ "{'loss': 0.025, 'grad_norm': 0.2853540778160095, 'learning_rate': 6.058702898142643e-07, 'epoch': 5.73}\n",
+ "{'loss': 0.0354, 'grad_norm': 1.2035536766052246, 'learning_rate': 5.279180709527765e-07, 'epoch': 5.75}\n",
+ "{'loss': 0.0276, 'grad_norm': 0.9827560782432556, 'learning_rate': 4.553052520375911e-07, 'epoch': 5.77}\n",
+ "{'loss': 0.0209, 'grad_norm': 0.42196208238601685, 'learning_rate': 3.8803966999139684e-07, 'epoch': 5.78}\n",
+ "{'loss': 0.0265, 'grad_norm': 1.0920729637145996, 'learning_rate': 3.261285846227868e-07, 'epoch': 5.8}\n",
+ "{'loss': 0.0218, 'grad_norm': 0.4562773108482361, 'learning_rate': 2.6957867784270787e-07, 'epoch': 5.82}\n",
+ "{'loss': 0.0229, 'grad_norm': 1.235041618347168, 'learning_rate': 2.1839605294330933e-07, 'epoch': 5.84}\n",
+ "{'loss': 0.0371, 'grad_norm': 0.8272603154182434, 'learning_rate': 1.725862339392259e-07, 'epoch': 5.85}\n",
+ "{'loss': 0.0187, 'grad_norm': 0.5107071399688721, 'learning_rate': 1.3215416497138754e-07, 'epoch': 5.87}\n",
+ "{'loss': 0.0347, 'grad_norm': 1.0998457670211792, 'learning_rate': 9.710420977340762e-08, 'epoch': 5.89}\n",
+ "{'loss': 0.027, 'grad_norm': 1.8781795501708984, 'learning_rate': 6.744015120061509e-08, 'epoch': 5.91}\n",
+ "{'loss': 0.0351, 'grad_norm': 0.9750437140464783, 'learning_rate': 4.316519082179227e-08, 'epoch': 5.93}\n",
+ "{'loss': 0.0209, 'grad_norm': 1.2990669012069702, 'learning_rate': 2.4281948573617874e-08, 'epoch': 5.94}\n",
+ "{'loss': 0.0354, 'grad_norm': 1.9354966878890991, 'learning_rate': 1.0792462477909882e-08, 'epoch': 5.96}\n",
+ "{'loss': 0.0381, 'grad_norm': 1.044374704360962, 'learning_rate': 2.6981884216847884e-09, 'epoch': 5.98}\n",
+ "{'loss': 0.0228, 'grad_norm': 0.6751245856285095, 'learning_rate': 0.0, 'epoch': 6.0}\n",
+ "100%|█████████████████████████████████████| 3360/3360 [3:00:43<00:00, 3.75s/it][INFO|trainer.py:3788] 2024-07-05 00:59:27,574 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-07-05 00:59:27,574 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-07-05 00:59:27,574 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 7%|██▊ | 3/46 [00:00<00:02, 15.75it/s]\u001b[A\n",
+ " 11%|████▊ | 5/46 [00:00<00:03, 12.78it/s]\u001b[A\n",
+ " 15%|██████▋ | 7/46 [00:00<00:03, 11.87it/s]\u001b[A\n",
+ " 20%|████████▌ | 9/46 [00:00<00:03, 11.54it/s]\u001b[A\n",
+ " 24%|██████████▎ | 11/46 [00:00<00:03, 11.21it/s]\u001b[A\n",
+ " 28%|████████████▏ | 13/46 [00:01<00:03, 10.94it/s]\u001b[A\n",
+ " 33%|██████████████ | 15/46 [00:01<00:02, 10.42it/s]\u001b[A\n",
+ " 37%|███████████████▉ | 17/46 [00:01<00:02, 10.38it/s]\u001b[A\n",
+ " 41%|█████████████████▊ | 19/46 [00:01<00:02, 9.50it/s]\u001b[A\n",
+ " 43%|██████████████████▋ | 20/46 [00:01<00:02, 9.13it/s]\u001b[A\n",
+ " 46%|███████████████████▋ | 21/46 [00:02<00:02, 9.21it/s]\u001b[A\n",
+ " 48%|████████████████████▌ | 22/46 [00:02<00:02, 9.18it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 23/46 [00:02<00:02, 9.02it/s]\u001b[A\n",
+ " 52%|██████████████████████▍ | 24/46 [00:02<00:02, 8.78it/s]\u001b[A\n",
+ " 54%|███████████████████████▎ | 25/46 [00:02<00:02, 8.65it/s]\u001b[A\n",
+ " 57%|████████████████████████▎ | 26/46 [00:02<00:02, 8.41it/s]\u001b[A\n",
+ " 59%|█████████████████████████▏ | 27/46 [00:02<00:02, 7.43it/s]\u001b[A\n",
+ " 61%|██████████████████████████▏ | 28/46 [00:02<00:02, 7.47it/s]\u001b[A\n",
+ " 63%|███████████████████████████ | 29/46 [00:03<00:02, 7.36it/s]\u001b[A\n",
+ " 65%|████████████████████████████ | 30/46 [00:03<00:02, 7.17it/s]\u001b[A\n",
+ " 67%|████████████████████████████▉ | 31/46 [00:03<00:02, 6.67it/s]\u001b[A\n",
+ " 70%|█████████████████████████████▉ | 32/46 [00:03<00:02, 5.32it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▊ | 33/46 [00:03<00:02, 4.79it/s]\u001b[A\n",
+ " 74%|███████████████████████████████▊ | 34/46 [00:04<00:02, 4.36it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 35/46 [00:04<00:02, 4.02it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▋ | 36/46 [00:04<00:02, 4.26it/s]\u001b[A\n",
+ " 80%|██████████████████████████████████▌ | 37/46 [00:04<00:01, 4.76it/s]\u001b[A\n",
+ " 83%|███████████████████████████████████▌ | 38/46 [00:04<00:01, 5.19it/s]\u001b[A\n",
+ " 85%|████████████████████████████████████▍ | 39/46 [00:05<00:01, 5.53it/s]\u001b[A\n",
+ " 87%|█████████████████████████████████████▍ | 40/46 [00:05<00:01, 5.88it/s]\u001b[A\n",
+ " 89%|██████████████████████████████████████▎ | 41/46 [00:05<00:00, 6.21it/s]\u001b[A\n",
+ " 91%|███████████████████████████████████████▎ | 42/46 [00:05<00:00, 6.42it/s]\u001b[A\n",
+ " 93%|████████████████████████████████████████▏ | 43/46 [00:05<00:00, 6.63it/s]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▏ | 44/46 [00:05<00:00, 6.74it/s]\u001b[A\n",
+ " 98%|██████████████████████████████████████████ | 45/46 [00:06<00:00, 6.80it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 3.4013702869415283, 'eval_runtime': 6.2741, 'eval_samples_per_second': 7.332, 'eval_steps_per_second': 7.332, 'epoch': 6.0}\n",
+ "100%|█████████████████████████████████████| 3360/3360 [3:00:49<00:00, 3.75s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:06<00:00, 6.66it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-07-05 00:59:33,853 >> Saving model checkpoint to saves/qwen2-1.5b/lora/sft/checkpoint-3360\n",
+ "[INFO|configuration_utils.py:733] 2024-07-05 00:59:35,314 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-05 00:59:35,316 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"quantization_config\": {\n",
+ " \"_load_in_4bit\": true,\n",
+ " \"_load_in_8bit\": false,\n",
+ " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n",
+ " \"bnb_4bit_quant_storage\": \"uint8\",\n",
+ " \"bnb_4bit_quant_type\": \"nf4\",\n",
+ " \"bnb_4bit_use_double_quant\": true,\n",
+ " \"llm_int8_enable_fp32_cpu_offload\": false,\n",
+ " \"llm_int8_has_fp16_weight\": false,\n",
+ " \"llm_int8_skip_modules\": null,\n",
+ " \"llm_int8_threshold\": 6.0,\n",
+ " \"load_in_4bit\": true,\n",
+ " \"load_in_8bit\": false,\n",
+ " \"quant_method\": \"bitsandbytes\"\n",
+ " },\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-07-05 00:59:35,381 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/checkpoint-3360/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-07-05 00:59:35,382 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/checkpoint-3360/special_tokens_map.json\n",
+ "[INFO|:482] 2024-07-05 00:59:35,695 >> \n",
+ "\n",
+ "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
+ "\n",
+ "\n",
+ "{'train_runtime': 10857.6726, 'train_samples_per_second': 2.477, 'train_steps_per_second': 0.309, 'train_loss': 0.6667878782021858, 'epoch': 6.0}\n",
+ "100%|█████████████████████████████████████| 3360/3360 [3:00:51<00:00, 3.23s/it]\n",
+ "[INFO|trainer.py:3478] 2024-07-05 00:59:35,700 >> Saving model checkpoint to saves/qwen2-1.5b/lora/sft\n",
+ "[INFO|configuration_utils.py:733] 2024-07-05 00:59:36,890 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-05 00:59:36,891 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"quantization_config\": {\n",
+ " \"_load_in_4bit\": true,\n",
+ " \"_load_in_8bit\": false,\n",
+ " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n",
+ " \"bnb_4bit_quant_storage\": \"uint8\",\n",
+ " \"bnb_4bit_quant_type\": \"nf4\",\n",
+ " \"bnb_4bit_use_double_quant\": true,\n",
+ " \"llm_int8_enable_fp32_cpu_offload\": false,\n",
+ " \"llm_int8_has_fp16_weight\": false,\n",
+ " \"llm_int8_skip_modules\": null,\n",
+ " \"llm_int8_threshold\": 6.0,\n",
+ " \"load_in_4bit\": true,\n",
+ " \"load_in_8bit\": false,\n",
+ " \"quant_method\": \"bitsandbytes\"\n",
+ " },\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|tokenization_utils_base.py:2574] 2024-07-05 00:59:36,947 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/tokenizer_config.json\n",
+ "[INFO|tokenization_utils_base.py:2583] 2024-07-05 00:59:36,947 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/special_tokens_map.json\n",
+ "***** train metrics *****\n",
+ " epoch = 5.9973\n",
+ " total_flos = 19692141GF\n",
+ " train_loss = 0.6668\n",
+ " train_runtime = 3:00:57.67\n",
+ " train_samples_per_second = 2.477\n",
+ " train_steps_per_second = 0.309\n",
+ "Figure saved at: saves/qwen2-1.5b/lora/sft/training_loss.png\n",
+ "Figure saved at: saves/qwen2-1.5b/lora/sft/training_eval_loss.png\n",
+ "[INFO|trainer.py:3788] 2024-07-05 00:59:37,341 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-07-05 00:59:37,341 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-07-05 00:59:37,341 >> Batch size = 1\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:05<00:00, 7.96it/s]\n",
+ "***** eval metrics *****\n",
+ " epoch = 5.9973\n",
+ " eval_loss = 3.4014\n",
+ " eval_runtime = 0:00:05.94\n",
+ " eval_samples_per_second = 7.742\n",
+ " eval_steps_per_second = 7.742\n",
+ "[INFO|modelcard.py:449] 2024-07-05 00:59:43,285 >> Dropping the following result as it does not have all the necessary fields:\n",
+ "{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}}\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: | 0.091 MB of 0.091 MB uploaded\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss ▁▁▃▄▆██\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime ▂▁▆█▆▆▅\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second ▆█▂▁▂▂▃\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second ▆█▂▁▂▂▃\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch ▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step ▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/grad_norm ▂▁▁▂▂▂▂▂▂▂▂▂▂▂▂▃▃▂▂▃▂▂▂▂▂▂▂█▃▁▂▂▁▁▁▁▁▁▁▁\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate ▂▄▅▇██████▇▇▇▇▇▆▆▆▆▅▅▅▄▄▄▃▃▃▃▂▂▂▂▁▁▁▁▁▁▁\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss ████▇▇▇▅▆▆▅▅▅▅▃▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: \n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss 3.40137\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime 5.9413\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second 7.742\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second 7.742\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: total_flos 2.114427607798579e+16\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch 5.99732\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step 3360\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/grad_norm 0.67512\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate 0.0\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss 0.0228\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train_loss 0.66679\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train_runtime 10857.6726\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train_samples_per_second 2.477\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train_steps_per_second 0.309\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: \n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mqwen2_1.5b_lora_sft\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface/runs/4fbnqsea\u001b[0m\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at: \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface\u001b[0m\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 6 W&B file(s), 0 media file(s), 1 artifact file(s) and 0 other file(s)\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20240704_215839-4fbnqsea/logs\u001b[0m\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m The new W&B backend becomes opt-out in version 0.18.0; try it out with `wandb.require(\"core\")`! See https://wandb.me/wandb-core for more information.\n",
+ "CPU times: user 3min 32s, sys: 1min 10s, total: 4min 43s\n",
+ "Wall time: 3h 3min 14s\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "!./scripts/tune-lf.sh config/qwen2_1.5b_lora_sft_unsloth.yaml"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Current Directory:\n",
+ "/home/inflaton/code/projects/courses/llm-finetuning/llama-factory\n",
+ "07/05/2024 06:15:40 - WARNING - llamafactory.hparams.parser - We recommend enable `upcast_layernorm` in quantized training.\n",
+ "07/05/2024 06:15:40 - INFO - llamafactory.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, compute dtype: torch.bfloat16\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-05 06:15:40,695 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/vocab.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-05 06:15:40,695 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/merges.txt\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-05 06:15:40,695 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-05 06:15:40,695 >> loading file added_tokens.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-05 06:15:40,695 >> loading file special_tokens_map.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-05 06:15:40,695 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer_config.json\n",
+ "[WARNING|logging.py:313] 2024-07-05 06:15:40,871 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "07/05/2024 06:15:40 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+ "07/05/2024 06:15:40 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+ "07/05/2024 06:15:40 - INFO - llamafactory.data.loader - Loading dataset alpaca_mac.json...\n",
+ "Converting format of dataset (num_proc=16): 100%|█| 4528/4528 [00:00<00:00, 1717\n",
+ "Running tokenizer on dataset (num_proc=16): 100%|█| 4528/4528 [00:01<00:00, 2570\n",
+ "input_ids:\n",
+ "[151644, 872, 198, 5501, 14683, 279, 2701, 8453, 1467, 1119, 6364, 323, 3410, 1172, 279, 24531, 2213, 11, 4302, 770, 624, 35987, 102895, 99164, 100324, 100717, 100095, 99509, 1773, 151645, 198, 151644, 77091, 198, 17949, 358, 572, 2617, 553, 264, 38835, 44486, 13, 151645]\n",
+ "inputs:\n",
+ "<|im_start|>user\n",
+ "Please translate the following Chinese text into English and provide only the translated content, nothing else.\n",
+ "全仗着狐仙搭救。<|im_end|>\n",
+ "<|im_start|>assistant\n",
+ "Because I was protected by a fox fairy.<|im_end|>\n",
+ "label_ids:\n",
+ "[-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 17949, 358, 572, 2617, 553, 264, 38835, 44486, 13, 151645]\n",
+ "labels:\n",
+ "Because I was protected by a fox fairy.<|im_end|>\n",
+ "[INFO|configuration_utils.py:733] 2024-07-05 06:15:44,437 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-05 06:15:44,438 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "07/05/2024 06:15:44 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n",
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "[INFO|configuration_utils.py:733] 2024-07-05 06:15:45,429 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-0.5b-instruct-bnb-4bit/snapshots/c3b24ce4827d69f5c3bde9aba00047774069ab72/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-05 06:15:45,430 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"unsloth/qwen2-0.5b-instruct-bnb-4bit\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"quantization_config\": {\n",
+ " \"_load_in_4bit\": true,\n",
+ " \"_load_in_8bit\": false,\n",
+ " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n",
+ " \"bnb_4bit_quant_storage\": \"uint8\",\n",
+ " \"bnb_4bit_quant_type\": \"nf4\",\n",
+ " \"bnb_4bit_use_double_quant\": true,\n",
+ " \"llm_int8_enable_fp32_cpu_offload\": false,\n",
+ " \"llm_int8_has_fp16_weight\": false,\n",
+ " \"llm_int8_skip_modules\": null,\n",
+ " \"llm_int8_threshold\": 6.0,\n",
+ " \"load_in_4bit\": true,\n",
+ " \"load_in_8bit\": false,\n",
+ " \"quant_method\": \"bitsandbytes\"\n",
+ " },\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "==((====))== Unsloth: Fast Qwen2 patching release 2024.7\n",
+ " \\\\ /| GPU: NVIDIA GeForce RTX 4080 Laptop GPU. Max memory: 11.994 GB. Platform = Linux.\n",
+ "O^O/ \\_/ \\ Pytorch: 2.3.0+cu121. CUDA = 8.9. CUDA Toolkit = 12.1.\n",
+ "\\ / Bfloat16 = TRUE. FA [Xformers = 0.0.26.post1. FA2 = False]\n",
+ " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n",
+ "[INFO|configuration_utils.py:733] 2024-07-05 06:15:46,517 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-0.5b-instruct-bnb-4bit/snapshots/c3b24ce4827d69f5c3bde9aba00047774069ab72/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-05 06:15:46,517 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"unsloth/qwen2-0.5b-instruct-bnb-4bit\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"quantization_config\": {\n",
+ " \"_load_in_4bit\": true,\n",
+ " \"_load_in_8bit\": false,\n",
+ " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n",
+ " \"bnb_4bit_quant_storage\": \"uint8\",\n",
+ " \"bnb_4bit_quant_type\": \"nf4\",\n",
+ " \"bnb_4bit_use_double_quant\": true,\n",
+ " \"llm_int8_enable_fp32_cpu_offload\": false,\n",
+ " \"llm_int8_has_fp16_weight\": false,\n",
+ " \"llm_int8_skip_modules\": null,\n",
+ " \"llm_int8_threshold\": 6.0,\n",
+ " \"load_in_4bit\": true,\n",
+ " \"load_in_8bit\": false,\n",
+ " \"quant_method\": \"bitsandbytes\"\n",
+ " },\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|configuration_utils.py:733] 2024-07-05 06:15:47,071 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-0.5b-instruct-bnb-4bit/snapshots/c3b24ce4827d69f5c3bde9aba00047774069ab72/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-05 06:15:47,071 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"unsloth/qwen2-0.5b-instruct-bnb-4bit\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"quantization_config\": {\n",
+ " \"_load_in_4bit\": true,\n",
+ " \"_load_in_8bit\": false,\n",
+ " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n",
+ " \"bnb_4bit_quant_storage\": \"uint8\",\n",
+ " \"bnb_4bit_quant_type\": \"nf4\",\n",
+ " \"bnb_4bit_use_double_quant\": true,\n",
+ " \"llm_int8_enable_fp32_cpu_offload\": false,\n",
+ " \"llm_int8_has_fp16_weight\": false,\n",
+ " \"llm_int8_skip_modules\": null,\n",
+ " \"llm_int8_threshold\": 6.0,\n",
+ " \"load_in_4bit\": true,\n",
+ " \"load_in_8bit\": false,\n",
+ " \"quant_method\": \"bitsandbytes\"\n",
+ " },\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|modeling_utils.py:3556] 2024-07-05 06:15:47,115 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-0.5b-instruct-bnb-4bit/snapshots/c3b24ce4827d69f5c3bde9aba00047774069ab72/model.safetensors\n",
+ "[INFO|modeling_utils.py:1531] 2024-07-05 06:15:48,951 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+ "[INFO|configuration_utils.py:1000] 2024-07-05 06:15:48,969 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645\n",
+ "}\n",
+ "\n",
+ "[INFO|modeling_utils.py:4364] 2024-07-05 06:16:14,443 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+ "\n",
+ "[INFO|modeling_utils.py:4372] 2024-07-05 06:16:14,443 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at unsloth/qwen2-0.5b-instruct-bnb-4bit.\n",
+ "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+ "[INFO|configuration_utils.py:955] 2024-07-05 06:16:14,971 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-0.5b-instruct-bnb-4bit/snapshots/c3b24ce4827d69f5c3bde9aba00047774069ab72/generation_config.json\n",
+ "[INFO|configuration_utils.py:1000] 2024-07-05 06:16:14,971 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"do_sample\": true,\n",
+ " \"eos_token_id\": [\n",
+ " 151645,\n",
+ " 151643\n",
+ " ],\n",
+ " \"pad_token_id\": 151643,\n",
+ " \"repetition_penalty\": 1.1,\n",
+ " \"temperature\": 0.7,\n",
+ " \"top_k\": 20,\n",
+ " \"top_p\": 0.8\n",
+ "}\n",
+ "\n",
+ "07/05/2024 06:16:18 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.\n",
+ "07/05/2024 06:16:18 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.\n",
+ "07/05/2024 06:16:18 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA\n",
+ "07/05/2024 06:16:18 - INFO - llamafactory.model.model_utils.misc - Found linear modules: gate_proj,q_proj,k_proj,up_proj,down_proj,o_proj,v_proj\n",
+ "[WARNING|logging.py:328] 2024-07-05 06:16:19,091 >> Unsloth 2024.7 patched 24 layers with 0 QKV layers, 24 O layers and 24 MLP layers.\n",
+ "07/05/2024 06:16:19 - INFO - llamafactory.model.loader - trainable params: 4,399,104 || all params: 634,566,528 || trainable%: 0.6932\n",
+ "[INFO|trainer.py:642] 2024-07-05 06:16:19,940 >> Using auto half precision backend\n",
+ "07/05/2024 06:16:19 - WARNING - llamafactory.train.callbacks - Previous trainer log in this folder will be deleted.\n",
+ "07/05/2024 06:16:20 - INFO - llamafactory.train.trainer_utils - Using LoRA+ optimizer with loraplus lr ratio 16.00.\n",
+ "[WARNING|:223] 2024-07-05 06:16:20,129 >> ==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n",
+ " \\\\ /| Num examples = 4,482 | Num Epochs = 6\n",
+ "O^O/ \\_/ \\ Batch size per device = 1 | Gradient Accumulation steps = 8\n",
+ "\\ / Total batch size = 8 | Total steps = 3,360\n",
+ " \"-____-\" Number of trainable parameters = 4,399,104\n",
+ "[INFO|integration_utils.py:750] 2024-07-05 06:16:20,818 >> Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33minflaton-sg\u001b[0m (\u001b[33minflaton-ai\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.17.4\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m/home/inflaton/code/projects/courses/llm-finetuning/llama-factory/wandb/run-20240705_061623-3amepb0m\u001b[0m\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mqwen2_0.5b_lora_sft\u001b[0m\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface\u001b[0m\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface/runs/3amepb0m\u001b[0m\n",
+ "{'loss': 2.6325, 'grad_norm': 2.6052567958831787, 'learning_rate': 2.9761904761904763e-06, 'epoch': 0.02}\n",
+ "{'loss': 2.6514, 'grad_norm': 2.433773994445801, 'learning_rate': 5.9523809523809525e-06, 'epoch': 0.04}\n",
+ "{'loss': 2.474, 'grad_norm': 2.1471617221832275, 'learning_rate': 8.92857142857143e-06, 'epoch': 0.05}\n",
+ "{'loss': 2.3031, 'grad_norm': 4.300695419311523, 'learning_rate': 1.1904761904761905e-05, 'epoch': 0.07}\n",
+ "{'loss': 2.4774, 'grad_norm': 1.8105831146240234, 'learning_rate': 1.4880952380952381e-05, 'epoch': 0.09}\n",
+ "{'loss': 2.2519, 'grad_norm': 2.077115297317505, 'learning_rate': 1.785714285714286e-05, 'epoch': 0.11}\n",
+ "{'loss': 2.4309, 'grad_norm': 1.9538270235061646, 'learning_rate': 2.0833333333333336e-05, 'epoch': 0.12}\n",
+ "{'loss': 2.22, 'grad_norm': 2.1473119258880615, 'learning_rate': 2.380952380952381e-05, 'epoch': 0.14}\n",
+ "{'loss': 2.3228, 'grad_norm': 2.819317579269409, 'learning_rate': 2.6785714285714288e-05, 'epoch': 0.16}\n",
+ "{'loss': 2.238, 'grad_norm': 1.9084508419036865, 'learning_rate': 2.9761904761904762e-05, 'epoch': 0.18}\n",
+ "{'loss': 2.2707, 'grad_norm': 2.1343274116516113, 'learning_rate': 3.273809523809524e-05, 'epoch': 0.2}\n",
+ "{'loss': 2.286, 'grad_norm': 2.273739814758301, 'learning_rate': 3.571428571428572e-05, 'epoch': 0.21}\n",
+ "{'loss': 2.1805, 'grad_norm': 2.505805253982544, 'learning_rate': 3.8690476190476195e-05, 'epoch': 0.23}\n",
+ "{'loss': 2.2527, 'grad_norm': 2.4992618560791016, 'learning_rate': 4.166666666666667e-05, 'epoch': 0.25}\n",
+ "{'loss': 2.1387, 'grad_norm': 1.9521129131317139, 'learning_rate': 4.464285714285715e-05, 'epoch': 0.27}\n",
+ "{'loss': 2.1733, 'grad_norm': 1.7223074436187744, 'learning_rate': 4.761904761904762e-05, 'epoch': 0.29}\n",
+ "{'loss': 2.2774, 'grad_norm': 1.8748223781585693, 'learning_rate': 5.05952380952381e-05, 'epoch': 0.3}\n",
+ "{'loss': 2.0726, 'grad_norm': 2.039461135864258, 'learning_rate': 5.3571428571428575e-05, 'epoch': 0.32}\n",
+ "{'loss': 2.1471, 'grad_norm': 2.512571096420288, 'learning_rate': 5.6547619047619046e-05, 'epoch': 0.34}\n",
+ "{'loss': 2.3088, 'grad_norm': 2.0730302333831787, 'learning_rate': 5.9523809523809524e-05, 'epoch': 0.36}\n",
+ "{'loss': 2.2315, 'grad_norm': 1.9101688861846924, 'learning_rate': 6.25e-05, 'epoch': 0.37}\n",
+ "{'loss': 2.1767, 'grad_norm': 2.6846179962158203, 'learning_rate': 6.547619047619048e-05, 'epoch': 0.39}\n",
+ "{'loss': 2.1396, 'grad_norm': 2.3576760292053223, 'learning_rate': 6.845238095238096e-05, 'epoch': 0.41}\n",
+ "{'loss': 2.3496, 'grad_norm': 2.5166685581207275, 'learning_rate': 7.142857142857143e-05, 'epoch': 0.43}\n",
+ "{'loss': 2.1899, 'grad_norm': 2.326274871826172, 'learning_rate': 7.440476190476191e-05, 'epoch': 0.45}\n",
+ "{'loss': 2.1658, 'grad_norm': 2.342203140258789, 'learning_rate': 7.738095238095239e-05, 'epoch': 0.46}\n",
+ "{'loss': 2.1561, 'grad_norm': 2.895669937133789, 'learning_rate': 8.035714285714287e-05, 'epoch': 0.48}\n",
+ "{'loss': 2.264, 'grad_norm': 3.2078170776367188, 'learning_rate': 8.333333333333334e-05, 'epoch': 0.5}\n",
+ "{'loss': 2.088, 'grad_norm': 2.282803773880005, 'learning_rate': 8.630952380952382e-05, 'epoch': 0.52}\n",
+ "{'loss': 2.1821, 'grad_norm': 2.5930910110473633, 'learning_rate': 8.92857142857143e-05, 'epoch': 0.54}\n",
+ "{'loss': 2.2382, 'grad_norm': 2.7073450088500977, 'learning_rate': 9.226190476190478e-05, 'epoch': 0.55}\n",
+ "{'loss': 2.0117, 'grad_norm': 3.457638740539551, 'learning_rate': 9.523809523809524e-05, 'epoch': 0.57}\n",
+ "{'loss': 2.0526, 'grad_norm': 3.453278064727783, 'learning_rate': 9.821428571428572e-05, 'epoch': 0.59}\n",
+ "{'loss': 2.1403, 'grad_norm': 2.7960667610168457, 'learning_rate': 9.999956828659095e-05, 'epoch': 0.61}\n",
+ "{'loss': 2.161, 'grad_norm': 3.307030439376831, 'learning_rate': 9.999471159635539e-05, 'epoch': 0.62}\n",
+ "{'loss': 2.0478, 'grad_norm': 2.788396120071411, 'learning_rate': 9.998445910004082e-05, 'epoch': 0.64}\n",
+ "{'loss': 2.3267, 'grad_norm': 4.489534378051758, 'learning_rate': 9.996881190417393e-05, 'epoch': 0.66}\n",
+ "{'loss': 2.2085, 'grad_norm': 2.93642520904541, 'learning_rate': 9.994777169751806e-05, 'epoch': 0.68}\n",
+ "{'loss': 1.8982, 'grad_norm': 2.470207929611206, 'learning_rate': 9.992134075089084e-05, 'epoch': 0.7}\n",
+ "{'loss': 2.1388, 'grad_norm': 2.992520809173584, 'learning_rate': 9.988952191691925e-05, 'epoch': 0.71}\n",
+ "{'loss': 2.1675, 'grad_norm': 2.986842155456543, 'learning_rate': 9.985231862973168e-05, 'epoch': 0.73}\n",
+ "{'loss': 2.1914, 'grad_norm': 2.8504011631011963, 'learning_rate': 9.980973490458728e-05, 'epoch': 0.75}\n",
+ "{'loss': 2.1588, 'grad_norm': 3.4979565143585205, 'learning_rate': 9.976177533744261e-05, 'epoch': 0.77}\n",
+ "{'loss': 2.0952, 'grad_norm': 3.6922664642333984, 'learning_rate': 9.97084451044556e-05, 'epoch': 0.79}\n",
+ "{'loss': 2.0288, 'grad_norm': 2.895118236541748, 'learning_rate': 9.964974996142698e-05, 'epoch': 0.8}\n",
+ "{'loss': 2.1275, 'grad_norm': 3.1226203441619873, 'learning_rate': 9.958569624317893e-05, 'epoch': 0.82}\n",
+ "{'loss': 2.1303, 'grad_norm': 4.210818767547607, 'learning_rate': 9.951629086287151e-05, 'epoch': 0.84}\n",
+ "{'loss': 2.1294, 'grad_norm': 2.9749433994293213, 'learning_rate': 9.944154131125642e-05, 'epoch': 0.86}\n",
+ "{'loss': 2.1612, 'grad_norm': 2.9232656955718994, 'learning_rate': 9.936145565586871e-05, 'epoch': 0.87}\n",
+ "{'loss': 2.3294, 'grad_norm': 2.8355772495269775, 'learning_rate': 9.927604254015585e-05, 'epoch': 0.89}\n",
+ "{'loss': 2.274, 'grad_norm': 3.1120338439941406, 'learning_rate': 9.918531118254507e-05, 'epoch': 0.91}\n",
+ "{'loss': 2.1442, 'grad_norm': 4.310208797454834, 'learning_rate': 9.90892713754483e-05, 'epoch': 0.93}\n",
+ "{'loss': 2.1595, 'grad_norm': 3.8621461391448975, 'learning_rate': 9.898793348420536e-05, 'epoch': 0.95}\n",
+ "{'loss': 2.1399, 'grad_norm': 2.8605706691741943, 'learning_rate': 9.888130844596524e-05, 'epoch': 0.96}\n",
+ "{'loss': 2.1673, 'grad_norm': 3.161895275115967, 'learning_rate': 9.876940776850569e-05, 'epoch': 0.98}\n",
+ "{'loss': 2.1621, 'grad_norm': 3.304511785507202, 'learning_rate': 9.865224352899119e-05, 'epoch': 1.0}\n",
+ " 17%|██████▎ | 560/3360 [15:54<1:17:19, 1.66s/it][INFO|trainer.py:3788] 2024-07-05 06:32:21,677 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-07-05 06:32:21,677 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-07-05 06:32:21,677 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 7%|██▊ | 3/46 [00:00<00:01, 24.61it/s]\u001b[A\n",
+ " 13%|█████▋ | 6/46 [00:00<00:02, 19.42it/s]\u001b[A\n",
+ " 20%|████████▌ | 9/46 [00:00<00:02, 18.32it/s]\u001b[A\n",
+ " 24%|██████████▎ | 11/46 [00:00<00:02, 16.73it/s]\u001b[A\n",
+ " 28%|████████████▏ | 13/46 [00:00<00:01, 16.68it/s]\u001b[A\n",
+ " 33%|██████████████ | 15/46 [00:00<00:01, 16.68it/s]\u001b[A\n",
+ " 37%|███████████████▉ | 17/46 [00:00<00:01, 16.66it/s]\u001b[A\n",
+ " 41%|█████████████████▊ | 19/46 [00:01<00:01, 16.98it/s]\u001b[A\n",
+ " 46%|███████████████████▋ | 21/46 [00:01<00:01, 17.04it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 23/46 [00:01<00:01, 17.26it/s]\u001b[A\n",
+ " 54%|███████████████████████▎ | 25/46 [00:01<00:01, 17.35it/s]\u001b[A\n",
+ " 59%|█████████████████████████▏ | 27/46 [00:01<00:01, 16.65it/s]\u001b[A\n",
+ " 63%|███████████████████████████ | 29/46 [00:01<00:01, 16.79it/s]\u001b[A\n",
+ " 67%|████████████████████████████▉ | 31/46 [00:01<00:00, 16.54it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▊ | 33/46 [00:01<00:00, 15.94it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 35/46 [00:02<00:00, 16.17it/s]\u001b[A\n",
+ " 80%|██████████████████████████████████▌ | 37/46 [00:02<00:00, 16.44it/s]\u001b[A\n",
+ " 85%|████████████████████████████████████▍ | 39/46 [00:02<00:00, 16.57it/s]\u001b[A\n",
+ " 89%|██████████████████████████████████████▎ | 41/46 [00:02<00:00, 16.44it/s]\u001b[A\n",
+ " 93%|████████████████████████████████████████▏ | 43/46 [00:02<00:00, 16.64it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 2.109107255935669, 'eval_runtime': 2.798, 'eval_samples_per_second': 16.44, 'eval_steps_per_second': 16.44, 'epoch': 1.0}\n",
+ " 17%|██████▎ | 560/3360 [15:57<1:17:19, 1.66s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:02<00:00, 16.83it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-07-05 06:32:24,477 >> Saving model checkpoint to saves/qwen2-0.5b/lora/sft/checkpoint-560\n",
+ "[INFO|configuration_utils.py:733] 2024-07-05 06:32:25,696 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-0.5b-instruct-bnb-4bit/snapshots/c3b24ce4827d69f5c3bde9aba00047774069ab72/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-05 06:32:25,697 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"quantization_config\": {\n",
+ " \"_load_in_4bit\": true,\n",
+ " \"_load_in_8bit\": false,\n",
+ " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n",
+ " \"bnb_4bit_quant_storage\": \"uint8\",\n",
+ " \"bnb_4bit_quant_type\": \"nf4\",\n",
+ " \"bnb_4bit_use_double_quant\": true,\n",
+ " \"llm_int8_enable_fp32_cpu_offload\": false,\n",
+ " \"llm_int8_has_fp16_weight\": false,\n",
+ " \"llm_int8_skip_modules\": null,\n",
+ " \"llm_int8_threshold\": 6.0,\n",
+ " \"load_in_4bit\": true,\n",
+ " \"load_in_8bit\": false,\n",
+ " \"quant_method\": \"bitsandbytes\"\n",
+ " },\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "{'loss': 1.6151, 'grad_norm': 3.5378007888793945, 'learning_rate': 9.852982837266955e-05, 'epoch': 1.02}\n",
+ "{'loss': 1.3799, 'grad_norm': 3.1997132301330566, 'learning_rate': 9.840217551150706e-05, 'epoch': 1.04}\n",
+ "{'loss': 1.6132, 'grad_norm': 2.48860502243042, 'learning_rate': 9.826929872276255e-05, 'epoch': 1.05}\n",
+ "{'loss': 1.4984, 'grad_norm': 3.7188329696655273, 'learning_rate': 9.81312123475006e-05, 'epoch': 1.07}\n",
+ "{'loss': 1.4967, 'grad_norm': 3.4040935039520264, 'learning_rate': 9.798793128904356e-05, 'epoch': 1.09}\n",
+ "{'loss': 1.5688, 'grad_norm': 3.603771924972534, 'learning_rate': 9.78394710113631e-05, 'epoch': 1.11}\n",
+ "{'loss': 1.4902, 'grad_norm': 3.248730421066284, 'learning_rate': 9.768584753741134e-05, 'epoch': 1.12}\n",
+ "{'loss': 1.4788, 'grad_norm': 4.081541538238525, 'learning_rate': 9.752707744739145e-05, 'epoch': 1.14}\n",
+ "{'loss': 1.5933, 'grad_norm': 3.348815441131592, 'learning_rate': 9.736317787696816e-05, 'epoch': 1.16}\n",
+ "{'loss': 1.4597, 'grad_norm': 5.059058666229248, 'learning_rate': 9.719416651541839e-05, 'epoch': 1.18}\n",
+ "{'loss': 1.5088, 'grad_norm': 2.929900646209717, 'learning_rate': 9.702006160372209e-05, 'epoch': 1.2}\n",
+ "{'loss': 1.5122, 'grad_norm': 3.9229655265808105, 'learning_rate': 9.684088193259355e-05, 'epoch': 1.21}\n",
+ "{'loss': 1.4982, 'grad_norm': 4.456009864807129, 'learning_rate': 9.665664684045333e-05, 'epoch': 1.23}\n",
+ "{'loss': 1.5631, 'grad_norm': 6.255136966705322, 'learning_rate': 9.646737621134112e-05, 'epoch': 1.25}\n",
+ "{'loss': 1.5067, 'grad_norm': 4.147162914276123, 'learning_rate': 9.627309047276974e-05, 'epoch': 1.27}\n",
+ "{'loss': 1.6788, 'grad_norm': 4.083860874176025, 'learning_rate': 9.607381059352038e-05, 'epoch': 1.29}\n",
+ "{'loss': 1.6006, 'grad_norm': 3.7379791736602783, 'learning_rate': 9.586955808137958e-05, 'epoch': 1.3}\n",
+ "{'loss': 1.6328, 'grad_norm': 3.6500179767608643, 'learning_rate': 9.566035498081784e-05, 'epoch': 1.32}\n",
+ "{'loss': 1.6155, 'grad_norm': 3.455841302871704, 'learning_rate': 9.544622387061055e-05, 'epoch': 1.34}\n",
+ "{'loss': 1.3868, 'grad_norm': 3.636683702468872, 'learning_rate': 9.522718786140097e-05, 'epoch': 1.36}\n",
+ "{'loss': 1.5776, 'grad_norm': 4.494875431060791, 'learning_rate': 9.500327059320606e-05, 'epoch': 1.37}\n",
+ "{'loss': 1.4877, 'grad_norm': 4.710891246795654, 'learning_rate': 9.477449623286505e-05, 'epoch': 1.39}\n",
+ "{'loss': 1.401, 'grad_norm': 3.5016818046569824, 'learning_rate': 9.454088947143116e-05, 'epoch': 1.41}\n",
+ "{'loss': 1.628, 'grad_norm': 4.40405797958374, 'learning_rate': 9.430247552150673e-05, 'epoch': 1.43}\n",
+ "{'loss': 1.4999, 'grad_norm': 3.74572491645813, 'learning_rate': 9.405928011452211e-05, 'epoch': 1.45}\n",
+ "{'loss': 1.5602, 'grad_norm': 4.144255638122559, 'learning_rate': 9.381132949795861e-05, 'epoch': 1.46}\n",
+ "{'loss': 1.6872, 'grad_norm': 4.109062671661377, 'learning_rate': 9.35586504325155e-05, 'epoch': 1.48}\n",
+ "{'loss': 1.5494, 'grad_norm': 7.194815635681152, 'learning_rate': 9.330127018922194e-05, 'epoch': 1.5}\n",
+ "{'loss': 1.4354, 'grad_norm': 3.779526948928833, 'learning_rate': 9.303921654649362e-05, 'epoch': 1.52}\n",
+ "{'loss': 1.593, 'grad_norm': 3.863893508911133, 'learning_rate': 9.277251778713474e-05, 'epoch': 1.54}\n",
+ "{'loss': 1.5795, 'grad_norm': 3.684547185897827, 'learning_rate': 9.250120269528546e-05, 'epoch': 1.55}\n",
+ "{'loss': 1.5245, 'grad_norm': 3.9775428771972656, 'learning_rate': 9.22253005533154e-05, 'epoch': 1.57}\n",
+ "{'loss': 1.631, 'grad_norm': 4.817204475402832, 'learning_rate': 9.194484113866313e-05, 'epoch': 1.59}\n",
+ "{'loss': 1.658, 'grad_norm': 3.928107738494873, 'learning_rate': 9.165985472062246e-05, 'epoch': 1.61}\n",
+ "{'loss': 1.464, 'grad_norm': 4.099756240844727, 'learning_rate': 9.137037205707552e-05, 'epoch': 1.62}\n",
+ "{'loss': 1.5206, 'grad_norm': 3.9024410247802734, 'learning_rate': 9.107642439117321e-05, 'epoch': 1.64}\n",
+ "{'loss': 1.6011, 'grad_norm': 3.7552289962768555, 'learning_rate': 9.077804344796302e-05, 'epoch': 1.66}\n",
+ "{'loss': 1.4891, 'grad_norm': 3.713045835494995, 'learning_rate': 9.04752614309652e-05, 'epoch': 1.68}\n",
+ "{'loss': 1.5139, 'grad_norm': 3.589451313018799, 'learning_rate': 9.01681110186971e-05, 'epoch': 1.7}\n",
+ "{'loss': 1.5901, 'grad_norm': 3.9955010414123535, 'learning_rate': 8.985662536114613e-05, 'epoch': 1.71}\n",
+ "{'loss': 1.5646, 'grad_norm': 3.6160426139831543, 'learning_rate': 8.954083807619208e-05, 'epoch': 1.73}\n",
+ "{'loss': 1.6884, 'grad_norm': 4.0372796058654785, 'learning_rate': 8.922078324597879e-05, 'epoch': 1.75}\n",
+ "{'loss': 1.6813, 'grad_norm': 4.466279983520508, 'learning_rate': 8.889649541323574e-05, 'epoch': 1.77}\n",
+ "{'loss': 1.5947, 'grad_norm': 5.11010217666626, 'learning_rate': 8.856800957755e-05, 'epoch': 1.78}\n",
+ "{'loss': 1.6637, 'grad_norm': 5.363622188568115, 'learning_rate': 8.823536119158864e-05, 'epoch': 1.8}\n",
+ "{'loss': 1.5541, 'grad_norm': 4.0909223556518555, 'learning_rate': 8.789858615727265e-05, 'epoch': 1.82}\n",
+ "{'loss': 1.523, 'grad_norm': 3.796602249145508, 'learning_rate': 8.755772082190194e-05, 'epoch': 1.84}\n",
+ "{'loss': 1.6437, 'grad_norm': 4.511483669281006, 'learning_rate': 8.721280197423258e-05, 'epoch': 1.86}\n",
+ "{'loss': 1.4852, 'grad_norm': 4.5722246170043945, 'learning_rate': 8.68638668405062e-05, 'epoch': 1.87}\n",
+ "{'loss': 1.5986, 'grad_norm': 4.731987953186035, 'learning_rate': 8.651095308043232e-05, 'epoch': 1.89}\n",
+ "{'loss': 1.7502, 'grad_norm': 6.07273530960083, 'learning_rate': 8.61540987831238e-05, 'epoch': 1.91}\n",
+ "{'loss': 1.6979, 'grad_norm': 5.418001651763916, 'learning_rate': 8.579334246298593e-05, 'epoch': 1.93}\n",
+ "{'loss': 1.5625, 'grad_norm': 4.6554341316223145, 'learning_rate': 8.542872305555978e-05, 'epoch': 1.95}\n",
+ "{'loss': 1.4509, 'grad_norm': 3.8252899646759033, 'learning_rate': 8.50602799133199e-05, 'epoch': 1.96}\n",
+ "{'loss': 1.5915, 'grad_norm': 4.251583099365234, 'learning_rate': 8.468805280142709e-05, 'epoch': 1.98}\n",
+ "{'loss': 1.6074, 'grad_norm': 4.587167739868164, 'learning_rate': 8.43120818934367e-05, 'epoch': 2.0}\n",
+ " 33%|████████████▎ | 1120/3360 [31:19<1:00:24, 1.62s/it][INFO|trainer.py:3788] 2024-07-05 06:47:46,319 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-07-05 06:47:46,319 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-07-05 06:47:46,319 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 7%|██▊ | 3/46 [00:00<00:01, 21.50it/s]\u001b[A\n",
+ " 13%|█████▋ | 6/46 [00:00<00:02, 18.37it/s]\u001b[A\n",
+ " 17%|███████▋ | 8/46 [00:00<00:02, 17.59it/s]\u001b[A\n",
+ " 22%|█████████▎ | 10/46 [00:00<00:02, 17.41it/s]\u001b[A\n",
+ " 26%|███████████▏ | 12/46 [00:00<00:01, 17.36it/s]\u001b[A\n",
+ " 30%|█████████████ | 14/46 [00:00<00:01, 17.38it/s]\u001b[A\n",
+ " 35%|██████████████▉ | 16/46 [00:00<00:01, 17.26it/s]\u001b[A\n",
+ " 39%|████████████████▊ | 18/46 [00:01<00:01, 16.55it/s]\u001b[A\n",
+ " 43%|██████████████████▋ | 20/46 [00:01<00:01, 16.44it/s]\u001b[A\n",
+ " 48%|████████████████████▌ | 22/46 [00:01<00:01, 16.77it/s]\u001b[A\n",
+ " 52%|██████████████████████▍ | 24/46 [00:01<00:01, 17.01it/s]\u001b[A\n",
+ " 57%|████████████████████████▎ | 26/46 [00:01<00:01, 16.91it/s]\u001b[A\n",
+ " 61%|██████████████████████████▏ | 28/46 [00:01<00:01, 17.18it/s]\u001b[A\n",
+ " 65%|████████████████████████████ | 30/46 [00:01<00:00, 16.93it/s]\u001b[A\n",
+ " 70%|█████████████████████████████▉ | 32/46 [00:01<00:00, 17.12it/s]\u001b[A\n",
+ " 74%|███████████████████████████████▊ | 34/46 [00:01<00:00, 17.01it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▋ | 36/46 [00:02<00:00, 17.13it/s]\u001b[A\n",
+ " 83%|███████████████████████████████████▌ | 38/46 [00:02<00:00, 16.83it/s]\u001b[A\n",
+ " 87%|█████████████████████████████████████▍ | 40/46 [00:02<00:00, 16.12it/s]\u001b[A\n",
+ " 91%|███████████████████████████████████████▎ | 42/46 [00:02<00:00, 15.90it/s]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▏ | 44/46 [00:02<00:00, 15.18it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 2.179692029953003, 'eval_runtime': 2.8332, 'eval_samples_per_second': 16.236, 'eval_steps_per_second': 16.236, 'epoch': 2.0}\n",
+ " 33%|████████████▎ | 1120/3360 [31:22<1:00:24, 1.62s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:02<00:00, 15.18it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-07-05 06:47:49,154 >> Saving model checkpoint to saves/qwen2-0.5b/lora/sft/checkpoint-1120\n",
+ "[INFO|configuration_utils.py:733] 2024-07-05 06:47:50,281 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-0.5b-instruct-bnb-4bit/snapshots/c3b24ce4827d69f5c3bde9aba00047774069ab72/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-05 06:47:50,282 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"quantization_config\": {\n",
+ " \"_load_in_4bit\": true,\n",
+ " \"_load_in_8bit\": false,\n",
+ " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n",
+ " \"bnb_4bit_quant_storage\": \"uint8\",\n",
+ " \"bnb_4bit_quant_type\": \"nf4\",\n",
+ " \"bnb_4bit_use_double_quant\": true,\n",
+ " \"llm_int8_enable_fp32_cpu_offload\": false,\n",
+ " \"llm_int8_has_fp16_weight\": false,\n",
+ " \"llm_int8_skip_modules\": null,\n",
+ " \"llm_int8_threshold\": 6.0,\n",
+ " \"load_in_4bit\": true,\n",
+ " \"load_in_8bit\": false,\n",
+ " \"quant_method\": \"bitsandbytes\"\n",
+ " },\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "{'loss': 0.9111, 'grad_norm': 5.008914470672607, 'learning_rate': 8.393240776696274e-05, 'epoch': 2.02}\n",
+ "{'loss': 0.863, 'grad_norm': 6.299067974090576, 'learning_rate': 8.354907139929851e-05, 'epoch': 2.03}\n",
+ "{'loss': 0.8515, 'grad_norm': 4.728297233581543, 'learning_rate': 8.316211416299397e-05, 'epoch': 2.05}\n",
+ "{'loss': 0.8316, 'grad_norm': 4.379367351531982, 'learning_rate': 8.27715778213905e-05, 'epoch': 2.07}\n",
+ "{'loss': 0.6608, 'grad_norm': 4.311402320861816, 'learning_rate': 8.237750452411353e-05, 'epoch': 2.09}\n",
+ "{'loss': 0.8713, 'grad_norm': 3.783642530441284, 'learning_rate': 8.197993680252334e-05, 'epoch': 2.11}\n",
+ "{'loss': 1.0158, 'grad_norm': 4.141658782958984, 'learning_rate': 8.157891756512488e-05, 'epoch': 2.12}\n",
+ "{'loss': 0.9155, 'grad_norm': 4.355412483215332, 'learning_rate': 8.117449009293668e-05, 'epoch': 2.14}\n",
+ "{'loss': 0.8407, 'grad_norm': 5.703305721282959, 'learning_rate': 8.076669803481965e-05, 'epoch': 2.16}\n",
+ "{'loss': 0.8494, 'grad_norm': 5.374706745147705, 'learning_rate': 8.035558540276618e-05, 'epoch': 2.18}\n",
+ "{'loss': 0.8743, 'grad_norm': 4.037242889404297, 'learning_rate': 7.994119656715002e-05, 'epoch': 2.2}\n",
+ "{'loss': 0.9841, 'grad_norm': 4.615417957305908, 'learning_rate': 7.952357625193749e-05, 'epoch': 2.21}\n",
+ "{'loss': 0.9296, 'grad_norm': 4.376211643218994, 'learning_rate': 7.91027695298606e-05, 'epoch': 2.23}\n",
+ "{'loss': 0.9142, 'grad_norm': 4.084548473358154, 'learning_rate': 7.86788218175523e-05, 'epoch': 2.25}\n",
+ "{'loss': 0.8517, 'grad_norm': 4.527939796447754, 'learning_rate': 7.8251778870645e-05, 'epoch': 2.27}\n",
+ "{'loss': 0.9113, 'grad_norm': 5.170512676239014, 'learning_rate': 7.782168677883206e-05, 'epoch': 2.28}\n",
+ "{'loss': 0.9332, 'grad_norm': 4.342284202575684, 'learning_rate': 7.738859196089358e-05, 'epoch': 2.3}\n",
+ "{'loss': 0.9759, 'grad_norm': 4.931323051452637, 'learning_rate': 7.695254115968648e-05, 'epoch': 2.32}\n",
+ "{'loss': 1.0079, 'grad_norm': 3.684819459915161, 'learning_rate': 7.651358143709972e-05, 'epoch': 2.34}\n",
+ "{'loss': 0.9958, 'grad_norm': 5.162328720092773, 'learning_rate': 7.60717601689749e-05, 'epoch': 2.36}\n",
+ "{'loss': 0.9528, 'grad_norm': 4.386671543121338, 'learning_rate': 7.562712503999327e-05, 'epoch': 2.37}\n",
+ "{'loss': 1.1468, 'grad_norm': 5.785244464874268, 'learning_rate': 7.517972403852905e-05, 'epoch': 2.39}\n",
+ "{'loss': 0.9291, 'grad_norm': 4.308371543884277, 'learning_rate': 7.472960545147038e-05, 'epoch': 2.41}\n",
+ "{'loss': 0.8408, 'grad_norm': 5.942112922668457, 'learning_rate': 7.427681785900761e-05, 'epoch': 2.43}\n",
+ "{'loss': 0.9693, 'grad_norm': 4.682136535644531, 'learning_rate': 7.382141012939034e-05, 'epoch': 2.45}\n",
+ "{'loss': 0.8726, 'grad_norm': 4.883449077606201, 'learning_rate': 7.33634314136531e-05, 'epoch': 2.46}\n",
+ "{'loss': 0.9426, 'grad_norm': 4.833103656768799, 'learning_rate': 7.290293114031061e-05, 'epoch': 2.48}\n",
+ "{'loss': 1.0333, 'grad_norm': 4.8503289222717285, 'learning_rate': 7.243995901002312e-05, 'epoch': 2.5}\n",
+ "{'loss': 0.9984, 'grad_norm': 4.3091230392456055, 'learning_rate': 7.197456499023225e-05, 'epoch': 2.52}\n",
+ "{'loss': 1.0019, 'grad_norm': 4.726260662078857, 'learning_rate': 7.150679930976825e-05, 'epoch': 2.53}\n",
+ "{'loss': 0.9594, 'grad_norm': 3.850511312484741, 'learning_rate': 7.103671245342887e-05, 'epoch': 2.55}\n",
+ "{'loss': 0.8701, 'grad_norm': 5.5012030601501465, 'learning_rate': 7.056435515653059e-05, 'epoch': 2.57}\n",
+ "{'loss': 1.0956, 'grad_norm': 5.610720157623291, 'learning_rate': 7.008977839943299e-05, 'epoch': 2.59}\n",
+ "{'loss': 0.9175, 'grad_norm': 3.8002779483795166, 'learning_rate': 6.961303340203653e-05, 'epoch': 2.61}\n",
+ "{'loss': 1.0243, 'grad_norm': 5.210932731628418, 'learning_rate': 6.91341716182545e-05, 'epoch': 2.62}\n",
+ "{'loss': 0.9902, 'grad_norm': 3.9311327934265137, 'learning_rate': 6.86532447304597e-05, 'epoch': 2.64}\n",
+ "{'loss': 0.9589, 'grad_norm': 4.984393119812012, 'learning_rate': 6.817030464390656e-05, 'epoch': 2.66}\n",
+ "{'loss': 0.9985, 'grad_norm': 4.881758689880371, 'learning_rate': 6.768540348112907e-05, 'epoch': 2.68}\n",
+ "{'loss': 0.8961, 'grad_norm': 6.465915203094482, 'learning_rate': 6.719859357631535e-05, 'epoch': 2.7}\n",
+ "{'loss': 0.8434, 'grad_norm': 5.6094183921813965, 'learning_rate': 6.670992746965938e-05, 'epoch': 2.71}\n",
+ "{'loss': 1.0485, 'grad_norm': 5.219779968261719, 'learning_rate': 6.621945790169036e-05, 'epoch': 2.73}\n",
+ "{'loss': 1.0165, 'grad_norm': 5.263071060180664, 'learning_rate': 6.572723780758069e-05, 'epoch': 2.75}\n",
+ "{'loss': 0.9104, 'grad_norm': 4.919801235198975, 'learning_rate': 6.523332031143272e-05, 'epoch': 2.77}\n",
+ "{'loss': 0.9633, 'grad_norm': 4.69899320602417, 'learning_rate': 6.473775872054521e-05, 'epoch': 2.78}\n",
+ "{'loss': 0.8483, 'grad_norm': 4.0923285484313965, 'learning_rate': 6.424060651966007e-05, 'epoch': 2.8}\n",
+ "{'loss': 0.8888, 'grad_norm': 5.461803436279297, 'learning_rate': 6.374191736518974e-05, 'epoch': 2.82}\n",
+ "{'loss': 0.9598, 'grad_norm': 4.758564472198486, 'learning_rate': 6.324174507942637e-05, 'epoch': 2.84}\n",
+ "{'loss': 0.9436, 'grad_norm': 6.395792007446289, 'learning_rate': 6.274014364473274e-05, 'epoch': 2.86}\n",
+ "{'loss': 1.1634, 'grad_norm': 6.077510356903076, 'learning_rate': 6.22371671977162e-05, 'epoch': 2.87}\n",
+ "{'loss': 1.0049, 'grad_norm': 5.1858720779418945, 'learning_rate': 6.173287002338577e-05, 'epoch': 2.89}\n",
+ "{'loss': 0.9795, 'grad_norm': 6.103806972503662, 'learning_rate': 6.122730654929334e-05, 'epoch': 2.91}\n",
+ "{'loss': 0.9422, 'grad_norm': 5.469768524169922, 'learning_rate': 6.072053133965938e-05, 'epoch': 2.93}\n",
+ "{'loss': 1.0349, 'grad_norm': 4.436359405517578, 'learning_rate': 6.021259908948402e-05, 'epoch': 2.95}\n",
+ "{'loss': 1.1161, 'grad_norm': 5.872861862182617, 'learning_rate': 5.970356461864391e-05, 'epoch': 2.96}\n",
+ "{'loss': 0.9069, 'grad_norm': 5.360676288604736, 'learning_rate': 5.919348286597569e-05, 'epoch': 2.98}\n",
+ "{'loss': 1.0593, 'grad_norm': 4.815310001373291, 'learning_rate': 5.868240888334653e-05, 'epoch': 3.0}\n",
+ " 50%|███████████████████▌ | 1680/3360 [46:46<45:31, 1.63s/it][INFO|trainer.py:3788] 2024-07-05 07:03:13,485 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-07-05 07:03:13,485 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-07-05 07:03:13,485 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 7%|██▊ | 3/46 [00:00<00:02, 20.21it/s]\u001b[A\n",
+ " 13%|█████▋ | 6/46 [00:00<00:02, 17.41it/s]\u001b[A\n",
+ " 17%|███████▋ | 8/46 [00:00<00:02, 17.26it/s]\u001b[A\n",
+ " 22%|█████████▎ | 10/46 [00:00<00:02, 16.97it/s]\u001b[A\n",
+ " 26%|███████████▏ | 12/46 [00:00<00:02, 16.11it/s]\u001b[A\n",
+ " 30%|█████████████ | 14/46 [00:00<00:01, 16.37it/s]\u001b[A\n",
+ " 35%|██████████████▉ | 16/46 [00:00<00:01, 16.50it/s]\u001b[A\n",
+ " 39%|████████████████▊ | 18/46 [00:01<00:01, 15.76it/s]\u001b[A\n",
+ " 43%|██████████████████▋ | 20/46 [00:01<00:01, 15.94it/s]\u001b[A\n",
+ " 48%|████████████████████▌ | 22/46 [00:01<00:01, 16.48it/s]\u001b[A\n",
+ " 52%|██████████████████████▍ | 24/46 [00:01<00:01, 16.40it/s]\u001b[A\n",
+ " 57%|████████████████████████▎ | 26/46 [00:01<00:01, 16.08it/s]\u001b[A\n",
+ " 61%|██████████████████████████▏ | 28/46 [00:01<00:01, 16.55it/s]\u001b[A\n",
+ " 65%|████████████████████████████ | 30/46 [00:01<00:01, 15.12it/s]\u001b[A\n",
+ " 70%|█████████████████████████████▉ | 32/46 [00:02<00:00, 14.44it/s]\u001b[A\n",
+ " 74%|███████████████████████████████▊ | 34/46 [00:02<00:00, 14.78it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▋ | 36/46 [00:02<00:00, 15.38it/s]\u001b[A\n",
+ " 83%|███████████████████████████████████▌ | 38/46 [00:02<00:00, 15.94it/s]\u001b[A\n",
+ " 87%|█████████████████████████████████████▍ | 40/46 [00:02<00:00, 16.33it/s]\u001b[A\n",
+ " 91%|███████████████████████████████████████▎ | 42/46 [00:02<00:00, 16.56it/s]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▏ | 44/46 [00:02<00:00, 16.62it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 2.4282326698303223, 'eval_runtime': 2.8929, 'eval_samples_per_second': 15.901, 'eval_steps_per_second': 15.901, 'epoch': 3.0}\n",
+ " 50%|███████████████████▌ | 1680/3360 [46:49<45:31, 1.63s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:02<00:00, 16.86it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-07-05 07:03:16,380 >> Saving model checkpoint to saves/qwen2-0.5b/lora/sft/checkpoint-1680\n",
+ "[INFO|configuration_utils.py:733] 2024-07-05 07:03:17,790 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-0.5b-instruct-bnb-4bit/snapshots/c3b24ce4827d69f5c3bde9aba00047774069ab72/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-05 07:03:17,790 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"quantization_config\": {\n",
+ " \"_load_in_4bit\": true,\n",
+ " \"_load_in_8bit\": false,\n",
+ " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n",
+ " \"bnb_4bit_quant_storage\": \"uint8\",\n",
+ " \"bnb_4bit_quant_type\": \"nf4\",\n",
+ " \"bnb_4bit_use_double_quant\": true,\n",
+ " \"llm_int8_enable_fp32_cpu_offload\": false,\n",
+ " \"llm_int8_has_fp16_weight\": false,\n",
+ " \"llm_int8_skip_modules\": null,\n",
+ " \"llm_int8_threshold\": 6.0,\n",
+ " \"load_in_4bit\": true,\n",
+ " \"load_in_8bit\": false,\n",
+ " \"quant_method\": \"bitsandbytes\"\n",
+ " },\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "{'loss': 0.6987, 'grad_norm': 4.28726863861084, 'learning_rate': 5.8170397829712485e-05, 'epoch': 3.02}\n",
+ "{'loss': 0.3462, 'grad_norm': 5.342904567718506, 'learning_rate': 5.765750496516547e-05, 'epoch': 3.03}\n",
+ "{'loss': 0.4899, 'grad_norm': 3.8532354831695557, 'learning_rate': 5.714378564496901e-05, 'epoch': 3.05}\n",
+ "{'loss': 0.4609, 'grad_norm': 4.3072590827941895, 'learning_rate': 5.6629295313583974e-05, 'epoch': 3.07}\n",
+ "{'loss': 0.4106, 'grad_norm': 4.2518463134765625, 'learning_rate': 5.611408949868457e-05, 'epoch': 3.09}\n",
+ "{'loss': 0.5169, 'grad_norm': 4.579401016235352, 'learning_rate': 5.559822380516539e-05, 'epoch': 3.11}\n",
+ "{'loss': 0.4794, 'grad_norm': 3.6858370304107666, 'learning_rate': 5.5081753909140096e-05, 'epoch': 3.12}\n",
+ "{'loss': 0.5473, 'grad_norm': 8.67149543762207, 'learning_rate': 5.456473555193242e-05, 'epoch': 3.14}\n",
+ "{'loss': 0.4638, 'grad_norm': 6.095928192138672, 'learning_rate': 5.404722453406017e-05, 'epoch': 3.16}\n",
+ "{'loss': 0.4697, 'grad_norm': 6.712044715881348, 'learning_rate': 5.3529276709212816e-05, 'epoch': 3.18}\n",
+ "{'loss': 0.4869, 'grad_norm': 4.1765336990356445, 'learning_rate': 5.30109479782233e-05, 'epoch': 3.2}\n",
+ "{'loss': 0.4821, 'grad_norm': 4.068556308746338, 'learning_rate': 5.249229428303486e-05, 'epoch': 3.21}\n",
+ "{'loss': 0.6011, 'grad_norm': 3.6553525924682617, 'learning_rate': 5.197337160066331e-05, 'epoch': 3.23}\n",
+ "{'loss': 0.4558, 'grad_norm': 4.888422012329102, 'learning_rate': 5.145423593715557e-05, 'epoch': 3.25}\n",
+ "{'loss': 0.5203, 'grad_norm': 4.138525009155273, 'learning_rate': 5.0934943321545115e-05, 'epoch': 3.27}\n",
+ "{'loss': 0.3826, 'grad_norm': 4.2213358879089355, 'learning_rate': 5.041554979980486e-05, 'epoch': 3.28}\n",
+ "{'loss': 0.5895, 'grad_norm': 4.9374260902404785, 'learning_rate': 4.9896111428798254e-05, 'epoch': 3.3}\n",
+ "{'loss': 0.5609, 'grad_norm': 4.482494831085205, 'learning_rate': 4.9376684270229254e-05, 'epoch': 3.32}\n",
+ "{'loss': 0.5478, 'grad_norm': 3.9575753211975098, 'learning_rate': 4.8857324384591653e-05, 'epoch': 3.34}\n",
+ "{'loss': 0.4865, 'grad_norm': 5.01925802230835, 'learning_rate': 4.8338087825118675e-05, 'epoch': 3.36}\n",
+ "{'loss': 0.5365, 'grad_norm': 4.109598636627197, 'learning_rate': 4.781903063173321e-05, 'epoch': 3.37}\n",
+ "{'loss': 0.4814, 'grad_norm': 3.7702512741088867, 'learning_rate': 4.730020882499964e-05, 'epoch': 3.39}\n",
+ "{'loss': 0.5355, 'grad_norm': 6.243114948272705, 'learning_rate': 4.678167840007767e-05, 'epoch': 3.41}\n",
+ "{'loss': 0.5361, 'grad_norm': 4.488025188446045, 'learning_rate': 4.626349532067879e-05, 'epoch': 3.43}\n",
+ "{'loss': 0.5952, 'grad_norm': 4.389721870422363, 'learning_rate': 4.574571551302647e-05, 'epoch': 3.44}\n",
+ "{'loss': 0.6049, 'grad_norm': 4.847557067871094, 'learning_rate': 4.522839485981994e-05, 'epoch': 3.46}\n",
+ "{'loss': 0.5697, 'grad_norm': 3.9925057888031006, 'learning_rate': 4.471158919420312e-05, 'epoch': 3.48}\n",
+ "{'loss': 0.5018, 'grad_norm': 5.327306747436523, 'learning_rate': 4.4195354293738484e-05, 'epoch': 3.5}\n",
+ "{'loss': 0.4745, 'grad_norm': 5.380455493927002, 'learning_rate': 4.367974587438733e-05, 'epoch': 3.52}\n",
+ "{'loss': 0.5421, 'grad_norm': 3.978426694869995, 'learning_rate': 4.316481958449634e-05, 'epoch': 3.53}\n",
+ "{'loss': 0.5091, 'grad_norm': 8.685088157653809, 'learning_rate': 4.2650630998791615e-05, 'epoch': 3.55}\n",
+ "{'loss': 0.6102, 'grad_norm': 4.471510887145996, 'learning_rate': 4.213723561238074e-05, 'epoch': 3.57}\n",
+ "{'loss': 0.4623, 'grad_norm': 4.236584663391113, 'learning_rate': 4.162468883476319e-05, 'epoch': 3.59}\n",
+ "{'loss': 0.5203, 'grad_norm': 5.698358535766602, 'learning_rate': 4.111304598385018e-05, 'epoch': 3.61}\n",
+ "{'loss': 0.5314, 'grad_norm': 5.975699424743652, 'learning_rate': 4.060236227999441e-05, 'epoch': 3.62}\n",
+ "{'loss': 0.484, 'grad_norm': 5.300996780395508, 'learning_rate': 4.0092692840030134e-05, 'epoch': 3.64}\n",
+ "{'loss': 0.4564, 'grad_norm': 7.857934474945068, 'learning_rate': 3.9584092671324606e-05, 'epoch': 3.66}\n",
+ "{'loss': 0.5715, 'grad_norm': 3.796581268310547, 'learning_rate': 3.907661666584131e-05, 'epoch': 3.68}\n",
+ "{'loss': 0.539, 'grad_norm': 4.170958995819092, 'learning_rate': 3.857031959421553e-05, 'epoch': 3.69}\n",
+ "{'loss': 0.5249, 'grad_norm': 6.283390045166016, 'learning_rate': 3.806525609984312e-05, 'epoch': 3.71}\n",
+ "{'loss': 0.4406, 'grad_norm': 6.235040664672852, 'learning_rate': 3.7561480692983006e-05, 'epoch': 3.73}\n",
+ "{'loss': 0.553, 'grad_norm': 3.715141534805298, 'learning_rate': 3.705904774487396e-05, 'epoch': 3.75}\n",
+ "{'loss': 0.5154, 'grad_norm': 6.352488040924072, 'learning_rate': 3.655801148186655e-05, 'epoch': 3.77}\n",
+ "{'loss': 0.4681, 'grad_norm': 4.480152130126953, 'learning_rate': 3.6058425979570485e-05, 'epoch': 3.78}\n",
+ "{'loss': 0.4915, 'grad_norm': 5.1917219161987305, 'learning_rate': 3.556034515701852e-05, 'epoch': 3.8}\n",
+ "{'loss': 0.5371, 'grad_norm': 4.501936912536621, 'learning_rate': 3.506382277084696e-05, 'epoch': 3.82}\n",
+ "{'loss': 0.5273, 'grad_norm': 3.53322434425354, 'learning_rate': 3.4568912409493945e-05, 'epoch': 3.84}\n",
+ "{'loss': 0.4405, 'grad_norm': 4.688470840454102, 'learning_rate': 3.4075667487415785e-05, 'epoch': 3.86}\n",
+ "{'loss': 0.5048, 'grad_norm': 6.739779949188232, 'learning_rate': 3.358414123932195e-05, 'epoch': 3.87}\n",
+ "{'loss': 0.5572, 'grad_norm': 4.120084762573242, 'learning_rate': 3.3094386714429724e-05, 'epoch': 3.89}\n",
+ "{'loss': 0.5498, 'grad_norm': 7.938605785369873, 'learning_rate': 3.2606456770738636e-05, 'epoch': 3.91}\n",
+ "{'loss': 0.3955, 'grad_norm': 4.132835865020752, 'learning_rate': 3.212040406932569e-05, 'epoch': 3.93}\n",
+ "{'loss': 0.4149, 'grad_norm': 3.777303457260132, 'learning_rate': 3.163628106866172e-05, 'epoch': 3.94}\n",
+ "{'loss': 0.6278, 'grad_norm': 5.201406955718994, 'learning_rate': 3.115414001894974e-05, 'epoch': 3.96}\n",
+ "{'loss': 0.5186, 'grad_norm': 7.258588790893555, 'learning_rate': 3.067403295648566e-05, 'epoch': 3.98}\n",
+ "{'loss': 0.4754, 'grad_norm': 3.8839337825775146, 'learning_rate': 3.019601169804216e-05, 'epoch': 4.0}\n",
+ " 67%|████████████████████████▋ | 2240/3360 [1:01:49<29:29, 1.58s/it][INFO|trainer.py:3788] 2024-07-05 07:18:16,440 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-07-05 07:18:16,441 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-07-05 07:18:16,441 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 7%|██▊ | 3/46 [00:00<00:01, 22.55it/s]\u001b[A\n",
+ " 13%|█████▋ | 6/46 [00:00<00:02, 19.36it/s]\u001b[A\n",
+ " 17%|███████▋ | 8/46 [00:00<00:02, 18.55it/s]\u001b[A\n",
+ " 22%|█████████▎ | 10/46 [00:00<00:02, 17.05it/s]\u001b[A\n",
+ " 26%|███████████▏ | 12/46 [00:00<00:02, 16.37it/s]\u001b[A\n",
+ " 30%|█████████████ | 14/46 [00:00<00:01, 16.48it/s]\u001b[A\n",
+ " 35%|██████████████▉ | 16/46 [00:00<00:01, 16.70it/s]\u001b[A\n",
+ " 39%|████████████████▊ | 18/46 [00:01<00:01, 16.96it/s]\u001b[A\n",
+ " 43%|██████████████████▋ | 20/46 [00:01<00:01, 17.35it/s]\u001b[A\n",
+ " 48%|████████████████████▌ | 22/46 [00:01<00:01, 17.29it/s]\u001b[A\n",
+ " 52%|██████████████████████▍ | 24/46 [00:01<00:01, 17.27it/s]\u001b[A\n",
+ " 57%|████████████████████████▎ | 26/46 [00:01<00:01, 17.07it/s]\u001b[A\n",
+ " 61%|██████████████████████████▏ | 28/46 [00:01<00:01, 17.13it/s]\u001b[A\n",
+ " 65%|████████████████████████████ | 30/46 [00:01<00:00, 16.29it/s]\u001b[A\n",
+ " 70%|█████████████████████████████▉ | 32/46 [00:01<00:00, 16.28it/s]\u001b[A\n",
+ " 74%|███████████████████████████████▊ | 34/46 [00:01<00:00, 16.60it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▋ | 36/46 [00:02<00:00, 16.25it/s]\u001b[A\n",
+ " 83%|███████████████████████████████████▌ | 38/46 [00:02<00:00, 16.33it/s]\u001b[A\n",
+ " 87%|█████████████████████████████████████▍ | 40/46 [00:02<00:00, 16.07it/s]\u001b[A\n",
+ " 91%|███████████████████████████████████████▎ | 42/46 [00:02<00:00, 16.22it/s]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▏ | 44/46 [00:02<00:00, 15.96it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 2.7581844329833984, 'eval_runtime': 2.8365, 'eval_samples_per_second': 16.217, 'eval_steps_per_second': 16.217, 'epoch': 4.0}\n",
+ " 67%|████████████████████████▋ | 2240/3360 [1:01:52<29:29, 1.58s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:02<00:00, 15.12it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-07-05 07:18:19,279 >> Saving model checkpoint to saves/qwen2-0.5b/lora/sft/checkpoint-2240\n",
+ "[INFO|configuration_utils.py:733] 2024-07-05 07:18:20,481 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-0.5b-instruct-bnb-4bit/snapshots/c3b24ce4827d69f5c3bde9aba00047774069ab72/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-05 07:18:20,481 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"quantization_config\": {\n",
+ " \"_load_in_4bit\": true,\n",
+ " \"_load_in_8bit\": false,\n",
+ " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n",
+ " \"bnb_4bit_quant_storage\": \"uint8\",\n",
+ " \"bnb_4bit_quant_type\": \"nf4\",\n",
+ " \"bnb_4bit_use_double_quant\": true,\n",
+ " \"llm_int8_enable_fp32_cpu_offload\": false,\n",
+ " \"llm_int8_has_fp16_weight\": false,\n",
+ " \"llm_int8_skip_modules\": null,\n",
+ " \"llm_int8_threshold\": 6.0,\n",
+ " \"load_in_4bit\": true,\n",
+ " \"load_in_8bit\": false,\n",
+ " \"quant_method\": \"bitsandbytes\"\n",
+ " },\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "{'loss': 0.3004, 'grad_norm': 3.442147731781006, 'learning_rate': 2.9720127835276256e-05, 'epoch': 4.02}\n",
+ "{'loss': 0.2198, 'grad_norm': 3.406682252883911, 'learning_rate': 2.9246432729161055e-05, 'epoch': 4.03}\n",
+ "{'loss': 0.2312, 'grad_norm': 3.0993845462799072, 'learning_rate': 2.8774977504442647e-05, 'epoch': 4.05}\n",
+ "{'loss': 0.2038, 'grad_norm': 4.1203694343566895, 'learning_rate': 2.8305813044122097e-05, 'epoch': 4.07}\n",
+ "{'loss': 0.1819, 'grad_norm': 2.116147756576538, 'learning_rate': 2.7838989983964065e-05, 'epoch': 4.09}\n",
+ "{'loss': 0.2672, 'grad_norm': 3.2091379165649414, 'learning_rate': 2.737455870703155e-05, 'epoch': 4.11}\n",
+ "{'loss': 0.1779, 'grad_norm': 2.386085033416748, 'learning_rate': 2.6912569338248315e-05, 'epoch': 4.12}\n",
+ "{'loss': 0.2997, 'grad_norm': 28.640592575073242, 'learning_rate': 2.645307173898901e-05, 'epoch': 4.14}\n",
+ "{'loss': 0.227, 'grad_norm': 2.2596945762634277, 'learning_rate': 2.5996115501697694e-05, 'epoch': 4.16}\n",
+ "{'loss': 0.2184, 'grad_norm': 4.521151065826416, 'learning_rate': 2.5541749944535554e-05, 'epoch': 4.18}\n",
+ "{'loss': 0.2038, 'grad_norm': 5.861654281616211, 'learning_rate': 2.5090024106057962e-05, 'epoch': 4.19}\n",
+ "{'loss': 0.2423, 'grad_norm': 4.1528639793396, 'learning_rate': 2.464098673992205e-05, 'epoch': 4.21}\n",
+ "{'loss': 0.2367, 'grad_norm': 2.1180801391601562, 'learning_rate': 2.4194686309624663e-05, 'epoch': 4.23}\n",
+ "{'loss': 0.2627, 'grad_norm': 4.030113697052002, 'learning_rate': 2.3751170983272e-05, 'epoch': 4.25}\n",
+ "{'loss': 0.2329, 'grad_norm': 4.907358646392822, 'learning_rate': 2.3310488628380757e-05, 'epoch': 4.27}\n",
+ "{'loss': 0.2542, 'grad_norm': 4.652915000915527, 'learning_rate': 2.2872686806712035e-05, 'epoch': 4.28}\n",
+ "{'loss': 0.2495, 'grad_norm': 5.10890531539917, 'learning_rate': 2.243781276913811e-05, 'epoch': 4.3}\n",
+ "{'loss': 0.1662, 'grad_norm': 3.823878288269043, 'learning_rate': 2.200591345054267e-05, 'epoch': 4.32}\n",
+ "{'loss': 0.2812, 'grad_norm': 3.004128932952881, 'learning_rate': 2.157703546475539e-05, 'epoch': 4.34}\n",
+ "{'loss': 0.2202, 'grad_norm': 4.443856716156006, 'learning_rate': 2.115122509952085e-05, 'epoch': 4.36}\n",
+ "{'loss': 0.2244, 'grad_norm': 2.996962070465088, 'learning_rate': 2.0728528311502976e-05, 'epoch': 4.37}\n",
+ "{'loss': 0.2474, 'grad_norm': 4.116214752197266, 'learning_rate': 2.0308990721324927e-05, 'epoch': 4.39}\n",
+ "{'loss': 0.1881, 'grad_norm': 4.773007392883301, 'learning_rate': 1.989265760864542e-05, 'epoch': 4.41}\n",
+ "{'loss': 0.2721, 'grad_norm': 3.045060873031616, 'learning_rate': 1.947957390727185e-05, 'epoch': 4.43}\n",
+ "{'loss': 0.2474, 'grad_norm': 5.480595111846924, 'learning_rate': 1.906978420031059e-05, 'epoch': 4.44}\n",
+ "{'loss': 0.1786, 'grad_norm': 2.452791929244995, 'learning_rate': 1.8663332715355396e-05, 'epoch': 4.46}\n",
+ "{'loss': 0.2655, 'grad_norm': 1.6951186656951904, 'learning_rate': 1.8260263319713844e-05, 'epoch': 4.48}\n",
+ "{'loss': 0.2307, 'grad_norm': 4.780274868011475, 'learning_rate': 1.7860619515673033e-05, 'epoch': 4.5}\n",
+ "{'loss': 0.2661, 'grad_norm': 4.14153528213501, 'learning_rate': 1.746444443580433e-05, 'epoch': 4.52}\n",
+ "{'loss': 0.2482, 'grad_norm': 4.6406989097595215, 'learning_rate': 1.7071780838308288e-05, 'epoch': 4.53}\n",
+ "{'loss': 0.2268, 'grad_norm': 3.8813576698303223, 'learning_rate': 1.6682671102399805e-05, 'epoch': 4.55}\n",
+ "{'loss': 0.2058, 'grad_norm': 3.428504467010498, 'learning_rate': 1.629715722373423e-05, 'epoch': 4.57}\n",
+ "{'loss': 0.2407, 'grad_norm': 4.3143415451049805, 'learning_rate': 1.5915280809874932e-05, 'epoch': 4.59}\n",
+ "{'loss': 0.2153, 'grad_norm': 4.420351505279541, 'learning_rate': 1.553708307580265e-05, 'epoch': 4.61}\n",
+ "{'loss': 0.2423, 'grad_norm': 3.758807897567749, 'learning_rate': 1.5162604839467265e-05, 'epoch': 4.62}\n",
+ "{'loss': 0.2376, 'grad_norm': 3.164726734161377, 'learning_rate': 1.4791886517382413e-05, 'epoch': 4.64}\n",
+ "{'loss': 0.223, 'grad_norm': 2.6924712657928467, 'learning_rate': 1.4424968120263504e-05, 'epoch': 4.66}\n",
+ "{'loss': 0.224, 'grad_norm': 3.1326253414154053, 'learning_rate': 1.4061889248709343e-05, 'epoch': 4.68}\n",
+ "{'loss': 0.2097, 'grad_norm': 3.1166789531707764, 'learning_rate': 1.370268908892825e-05, 'epoch': 4.69}\n",
+ "{'loss': 0.2607, 'grad_norm': 4.3387651443481445, 'learning_rate': 1.3347406408508695e-05, 'epoch': 4.71}\n",
+ "{'loss': 0.2217, 'grad_norm': 2.9194934368133545, 'learning_rate': 1.2996079552235263e-05, 'epoch': 4.73}\n",
+ "{'loss': 0.175, 'grad_norm': 2.6297366619110107, 'learning_rate': 1.264874643795021e-05, 'epoch': 4.75}\n",
+ "{'loss': 0.2148, 'grad_norm': 3.174553632736206, 'learning_rate': 1.230544455246101e-05, 'epoch': 4.77}\n",
+ "{'loss': 0.246, 'grad_norm': 3.611652374267578, 'learning_rate': 1.1966210947494583e-05, 'epoch': 4.78}\n",
+ "{'loss': 0.2477, 'grad_norm': 3.13002610206604, 'learning_rate': 1.1631082235698316e-05, 'epoch': 4.8}\n",
+ "{'loss': 0.2034, 'grad_norm': 3.1411221027374268, 'learning_rate': 1.130009458668863e-05, 'epoch': 4.82}\n",
+ "{'loss': 0.1899, 'grad_norm': 3.8253543376922607, 'learning_rate': 1.097328372314721e-05, 'epoch': 4.84}\n",
+ "{'loss': 0.2432, 'grad_norm': 4.582285404205322, 'learning_rate': 1.0650684916965559e-05, 'epoch': 4.85}\n",
+ "{'loss': 0.2412, 'grad_norm': 3.9309003353118896, 'learning_rate': 1.0332332985438248e-05, 'epoch': 4.87}\n",
+ "{'loss': 0.2543, 'grad_norm': 4.181048393249512, 'learning_rate': 1.0018262287505086e-05, 'epoch': 4.89}\n",
+ "{'loss': 0.2759, 'grad_norm': 1.8343684673309326, 'learning_rate': 9.708506720042932e-06, 'epoch': 4.91}\n",
+ "{'loss': 0.189, 'grad_norm': 2.335709571838379, 'learning_rate': 9.403099714207175e-06, 'epoch': 4.93}\n",
+ "{'loss': 0.2495, 'grad_norm': 4.065298080444336, 'learning_rate': 9.102074231823727e-06, 'epoch': 4.94}\n",
+ "{'loss': 0.2557, 'grad_norm': 2.8378493785858154, 'learning_rate': 8.805462761831418e-06, 'epoch': 4.96}\n",
+ "{'loss': 0.2784, 'grad_norm': 3.395693063735962, 'learning_rate': 8.513297316775625e-06, 'epoch': 4.98}\n",
+ "{'loss': 0.2621, 'grad_norm': 4.082712173461914, 'learning_rate': 8.225609429353187e-06, 'epoch': 5.0}\n",
+ " 83%|██████████████████████████████▊ | 2800/3360 [1:16:55<15:13, 1.63s/it][INFO|trainer.py:3788] 2024-07-05 07:33:22,309 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-07-05 07:33:22,309 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-07-05 07:33:22,310 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 7%|██▊ | 3/46 [00:00<00:01, 25.87it/s]\u001b[A\n",
+ " 13%|█████▋ | 6/46 [00:00<00:02, 18.98it/s]\u001b[A\n",
+ " 20%|████████▌ | 9/46 [00:00<00:02, 17.81it/s]\u001b[A\n",
+ " 24%|██████████▎ | 11/46 [00:00<00:01, 17.53it/s]\u001b[A\n",
+ " 28%|████████████▏ | 13/46 [00:00<00:01, 17.51it/s]\u001b[A\n",
+ " 33%|██████████████ | 15/46 [00:00<00:01, 17.04it/s]\u001b[A\n",
+ " 37%|███████████████▉ | 17/46 [00:00<00:01, 17.21it/s]\u001b[A\n",
+ " 41%|█████████████████▊ | 19/46 [00:01<00:01, 16.95it/s]\u001b[A\n",
+ " 46%|███████████████████▋ | 21/46 [00:01<00:01, 16.41it/s]\u001b[A\n",
+ " 50%|█████████████████████▌ | 23/46 [00:01<00:01, 16.44it/s]\u001b[A\n",
+ " 54%|███████████████████████▎ | 25/46 [00:01<00:01, 16.30it/s]\u001b[A\n",
+ " 59%|█████████████████████████▏ | 27/46 [00:01<00:01, 15.42it/s]\u001b[A\n",
+ " 63%|███████████████████████████ | 29/46 [00:01<00:01, 16.03it/s]\u001b[A\n",
+ " 67%|████████████████████████████▉ | 31/46 [00:01<00:00, 15.59it/s]\u001b[A\n",
+ " 72%|██████████████████████████████▊ | 33/46 [00:01<00:00, 15.23it/s]\u001b[A\n",
+ " 76%|████████████████████████████████▋ | 35/46 [00:02<00:00, 13.78it/s]\u001b[A\n",
+ " 80%|██████████████████████████████████▌ | 37/46 [00:02<00:00, 14.60it/s]\u001b[A\n",
+ " 85%|████████████████████████████████████▍ | 39/46 [00:02<00:00, 14.85it/s]\u001b[A\n",
+ " 89%|██████████████████████████████████████▎ | 41/46 [00:02<00:00, 14.92it/s]\u001b[A\n",
+ " 93%|████████████████████████████████████████▏ | 43/46 [00:02<00:00, 15.67it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 3.1747782230377197, 'eval_runtime': 2.9312, 'eval_samples_per_second': 15.693, 'eval_steps_per_second': 15.693, 'epoch': 5.0}\n",
+ " 83%|██████████████████████████████▊ | 2800/3360 [1:16:58<15:13, 1.63s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:02<00:00, 15.43it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-07-05 07:33:25,242 >> Saving model checkpoint to saves/qwen2-0.5b/lora/sft/checkpoint-2800\n",
+ "[INFO|configuration_utils.py:733] 2024-07-05 07:33:26,470 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-0.5b-instruct-bnb-4bit/snapshots/c3b24ce4827d69f5c3bde9aba00047774069ab72/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-05 07:33:26,471 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"quantization_config\": {\n",
+ " \"_load_in_4bit\": true,\n",
+ " \"_load_in_8bit\": false,\n",
+ " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n",
+ " \"bnb_4bit_quant_storage\": \"uint8\",\n",
+ " \"bnb_4bit_quant_type\": \"nf4\",\n",
+ " \"bnb_4bit_use_double_quant\": true,\n",
+ " \"llm_int8_enable_fp32_cpu_offload\": false,\n",
+ " \"llm_int8_has_fp16_weight\": false,\n",
+ " \"llm_int8_skip_modules\": null,\n",
+ " \"llm_int8_threshold\": 6.0,\n",
+ " \"load_in_4bit\": true,\n",
+ " \"load_in_8bit\": false,\n",
+ " \"quant_method\": \"bitsandbytes\"\n",
+ " },\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "{'loss': 0.1274, 'grad_norm': 1.5695966482162476, 'learning_rate': 7.942430149009161e-06, 'epoch': 5.02}\n",
+ "{'loss': 0.1052, 'grad_norm': 0.5931769609451294, 'learning_rate': 7.663790038585793e-06, 'epoch': 5.03}\n",
+ "{'loss': 0.1188, 'grad_norm': 4.6362762451171875, 'learning_rate': 7.389719171023857e-06, 'epoch': 5.05}\n",
+ "{'loss': 0.1032, 'grad_norm': 2.54799485206604, 'learning_rate': 7.1202471261170245e-06, 'epoch': 5.07}\n",
+ "{'loss': 0.0781, 'grad_norm': 2.0741422176361084, 'learning_rate': 6.855402987319348e-06, 'epoch': 5.09}\n",
+ "{'loss': 0.1428, 'grad_norm': 2.47188401222229, 'learning_rate': 6.595215338606397e-06, 'epoch': 5.1}\n",
+ "{'loss': 0.0839, 'grad_norm': 1.993886947631836, 'learning_rate': 6.339712261390213e-06, 'epoch': 5.12}\n",
+ "{'loss': 0.1086, 'grad_norm': 1.442935824394226, 'learning_rate': 6.088921331488568e-06, 'epoch': 5.14}\n",
+ "{'loss': 0.0851, 'grad_norm': 2.1466658115386963, 'learning_rate': 5.8428696161488215e-06, 'epoch': 5.16}\n",
+ "{'loss': 0.0964, 'grad_norm': 1.960119366645813, 'learning_rate': 5.601583671126531e-06, 'epoch': 5.18}\n",
+ "{'loss': 0.1149, 'grad_norm': 1.3245364427566528, 'learning_rate': 5.365089537819434e-06, 'epoch': 5.19}\n",
+ "{'loss': 0.1074, 'grad_norm': 0.817304253578186, 'learning_rate': 5.133412740456806e-06, 'epoch': 5.21}\n",
+ "{'loss': 0.0966, 'grad_norm': 1.4587805271148682, 'learning_rate': 4.906578283344759e-06, 'epoch': 5.23}\n",
+ "{'loss': 0.1326, 'grad_norm': 5.115628719329834, 'learning_rate': 4.684610648167503e-06, 'epoch': 5.25}\n",
+ "{'loss': 0.1112, 'grad_norm': 2.1370065212249756, 'learning_rate': 4.467533791345191e-06, 'epoch': 5.27}\n",
+ "{'loss': 0.0918, 'grad_norm': 1.5177031755447388, 'learning_rate': 4.255371141448272e-06, 'epoch': 5.28}\n",
+ "{'loss': 0.0911, 'grad_norm': 3.415386199951172, 'learning_rate': 4.048145596668967e-06, 'epoch': 5.3}\n",
+ "{'loss': 0.1295, 'grad_norm': 9.106415748596191, 'learning_rate': 3.84587952234991e-06, 'epoch': 5.32}\n",
+ "{'loss': 0.0753, 'grad_norm': 1.1960046291351318, 'learning_rate': 3.6485947485702832e-06, 'epoch': 5.34}\n",
+ "{'loss': 0.1281, 'grad_norm': 3.4662070274353027, 'learning_rate': 3.4563125677897932e-06, 'epoch': 5.35}\n",
+ "{'loss': 0.1045, 'grad_norm': 1.4903005361557007, 'learning_rate': 3.269053732550581e-06, 'epoch': 5.37}\n",
+ "{'loss': 0.094, 'grad_norm': 2.3145623207092285, 'learning_rate': 3.086838453237506e-06, 'epoch': 5.39}\n",
+ "{'loss': 0.056, 'grad_norm': 1.6177632808685303, 'learning_rate': 2.9096863958968268e-06, 'epoch': 5.41}\n",
+ "{'loss': 0.093, 'grad_norm': 1.7712160348892212, 'learning_rate': 2.737616680113758e-06, 'epoch': 5.43}\n",
+ "{'loss': 0.0846, 'grad_norm': 2.1207849979400635, 'learning_rate': 2.570647876948895e-06, 'epoch': 5.44}\n",
+ "{'loss': 0.1257, 'grad_norm': 1.7891684770584106, 'learning_rate': 2.408798006933882e-06, 'epoch': 5.46}\n",
+ "{'loss': 0.1472, 'grad_norm': 1.305862545967102, 'learning_rate': 2.252084538126542e-06, 'epoch': 5.48}\n",
+ "{'loss': 0.0784, 'grad_norm': 2.511289596557617, 'learning_rate': 2.100524384225555e-06, 'epoch': 5.5}\n",
+ "{'loss': 0.1159, 'grad_norm': 2.205674886703491, 'learning_rate': 1.9541339027450256e-06, 'epoch': 5.52}\n",
+ "{'loss': 0.1057, 'grad_norm': 2.3121867179870605, 'learning_rate': 1.8129288932490274e-06, 'epoch': 5.53}\n",
+ "{'loss': 0.1044, 'grad_norm': 0.5653843283653259, 'learning_rate': 1.6769245956464396e-06, 'epoch': 5.55}\n",
+ "{'loss': 0.1248, 'grad_norm': 2.8058314323425293, 'learning_rate': 1.5461356885461075e-06, 'epoch': 5.57}\n",
+ "{'loss': 0.1108, 'grad_norm': 1.7656151056289673, 'learning_rate': 1.4205762876726092e-06, 'epoch': 5.59}\n",
+ "{'loss': 0.103, 'grad_norm': 1.4396343231201172, 'learning_rate': 1.3002599443428243e-06, 'epoch': 5.6}\n",
+ "{'loss': 0.1239, 'grad_norm': 2.5784292221069336, 'learning_rate': 1.1851996440033319e-06, 'epoch': 5.62}\n",
+ "{'loss': 0.0961, 'grad_norm': 0.813414990901947, 'learning_rate': 1.0754078048289374e-06, 'epoch': 5.64}\n",
+ "{'loss': 0.1176, 'grad_norm': 2.7768945693969727, 'learning_rate': 9.708962763824048e-07, 'epoch': 5.66}\n",
+ "{'loss': 0.0784, 'grad_norm': 1.4548313617706299, 'learning_rate': 8.716763383355864e-07, 'epoch': 5.68}\n",
+ "{'loss': 0.0995, 'grad_norm': 1.4250032901763916, 'learning_rate': 7.777586992519959e-07, 'epoch': 5.69}\n",
+ "{'loss': 0.1014, 'grad_norm': 3.0032870769500732, 'learning_rate': 6.891534954310885e-07, 'epoch': 5.71}\n",
+ "{'loss': 0.0993, 'grad_norm': 3.392124891281128, 'learning_rate': 6.058702898142643e-07, 'epoch': 5.73}\n",
+ "{'loss': 0.0962, 'grad_norm': 2.9156267642974854, 'learning_rate': 5.279180709527765e-07, 'epoch': 5.75}\n",
+ "{'loss': 0.1407, 'grad_norm': 2.50022292137146, 'learning_rate': 4.553052520375911e-07, 'epoch': 5.77}\n",
+ "{'loss': 0.0832, 'grad_norm': 1.8522708415985107, 'learning_rate': 3.8803966999139684e-07, 'epoch': 5.78}\n",
+ "{'loss': 0.1036, 'grad_norm': 2.559648275375366, 'learning_rate': 3.261285846227868e-07, 'epoch': 5.8}\n",
+ "{'loss': 0.0851, 'grad_norm': 6.124639987945557, 'learning_rate': 2.6957867784270787e-07, 'epoch': 5.82}\n",
+ "{'loss': 0.0861, 'grad_norm': 1.8628261089324951, 'learning_rate': 2.1839605294330933e-07, 'epoch': 5.84}\n",
+ "{'loss': 0.1153, 'grad_norm': 2.0182836055755615, 'learning_rate': 1.725862339392259e-07, 'epoch': 5.85}\n",
+ "{'loss': 0.0913, 'grad_norm': 2.184485912322998, 'learning_rate': 1.3215416497138754e-07, 'epoch': 5.87}\n",
+ "{'loss': 0.132, 'grad_norm': 2.652066707611084, 'learning_rate': 9.710420977340762e-08, 'epoch': 5.89}\n",
+ "{'loss': 0.0822, 'grad_norm': 2.054509401321411, 'learning_rate': 6.744015120061509e-08, 'epoch': 5.91}\n",
+ "{'loss': 0.1632, 'grad_norm': 2.1160929203033447, 'learning_rate': 4.316519082179227e-08, 'epoch': 5.93}\n",
+ "{'loss': 0.0715, 'grad_norm': 3.3849403858184814, 'learning_rate': 2.4281948573617874e-08, 'epoch': 5.94}\n",
+ "{'loss': 0.1134, 'grad_norm': 3.3306052684783936, 'learning_rate': 1.0792462477909882e-08, 'epoch': 5.96}\n",
+ "{'loss': 0.1273, 'grad_norm': 2.356410026550293, 'learning_rate': 2.6981884216847884e-09, 'epoch': 5.98}\n",
+ "{'loss': 0.1189, 'grad_norm': 2.4627721309661865, 'learning_rate': 0.0, 'epoch': 6.0}\n",
+ "100%|█████████████████████████████████████| 3360/3360 [1:31:57<00:00, 1.60s/it][INFO|trainer.py:3788] 2024-07-05 07:48:24,113 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-07-05 07:48:24,113 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-07-05 07:48:24,113 >> Batch size = 1\n",
+ "\n",
+ " 0%| | 0/46 [00:00, ?it/s]\u001b[A\n",
+ " 7%|██▊ | 3/46 [00:00<00:01, 23.35it/s]\u001b[A\n",
+ " 13%|█████▋ | 6/46 [00:00<00:02, 18.72it/s]\u001b[A\n",
+ " 17%|███████▋ | 8/46 [00:00<00:02, 18.62it/s]\u001b[A\n",
+ " 22%|█████████▎ | 10/46 [00:00<00:01, 18.07it/s]\u001b[A\n",
+ " 26%|███████████▏ | 12/46 [00:00<00:01, 17.18it/s]\u001b[A\n",
+ " 30%|█████████████ | 14/46 [00:00<00:01, 17.36it/s]\u001b[A\n",
+ " 35%|██████████████▉ | 16/46 [00:00<00:01, 17.30it/s]\u001b[A\n",
+ " 39%|████████████████▊ | 18/46 [00:01<00:01, 17.39it/s]\u001b[A\n",
+ " 43%|██████████████████▋ | 20/46 [00:01<00:01, 17.07it/s]\u001b[A\n",
+ " 48%|████████████████████▌ | 22/46 [00:01<00:01, 17.15it/s]\u001b[A\n",
+ " 52%|██████████████████████▍ | 24/46 [00:01<00:01, 16.56it/s]\u001b[A\n",
+ " 57%|████████████████████████▎ | 26/46 [00:01<00:01, 16.08it/s]\u001b[A\n",
+ " 61%|██████████████████████████▏ | 28/46 [00:01<00:01, 16.33it/s]\u001b[A\n",
+ " 65%|████████████████████████████ | 30/46 [00:01<00:01, 15.84it/s]\u001b[A\n",
+ " 70%|█████████████████████████████▉ | 32/46 [00:01<00:00, 16.53it/s]\u001b[A\n",
+ " 74%|███████████████████████████████▊ | 34/46 [00:02<00:00, 16.00it/s]\u001b[A\n",
+ " 78%|█████████████████████████████████▋ | 36/46 [00:02<00:00, 15.36it/s]\u001b[A\n",
+ " 83%|███████████████████████████████████▌ | 38/46 [00:02<00:00, 15.20it/s]\u001b[A\n",
+ " 87%|█████████████████████████████████████▍ | 40/46 [00:02<00:00, 15.28it/s]\u001b[A\n",
+ " 91%|███████████████████████████████████████▎ | 42/46 [00:02<00:00, 15.21it/s]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▏ | 44/46 [00:02<00:00, 15.97it/s]\u001b[A\n",
+ " \u001b[A\n",
+ "\u001b[A{'eval_loss': 3.542919397354126, 'eval_runtime': 2.8444, 'eval_samples_per_second': 16.172, 'eval_steps_per_second': 16.172, 'epoch': 6.0}\n",
+ "100%|█████████████████████████████████████| 3360/3360 [1:31:59<00:00, 1.60s/it]\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:02<00:00, 16.55it/s]\u001b[A\n",
+ " \u001b[A[INFO|trainer.py:3478] 2024-07-05 07:48:26,960 >> Saving model checkpoint to saves/qwen2-0.5b/lora/sft/checkpoint-3360\n",
+ "[INFO|configuration_utils.py:733] 2024-07-05 07:48:28,128 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-0.5b-instruct-bnb-4bit/snapshots/c3b24ce4827d69f5c3bde9aba00047774069ab72/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-05 07:48:28,128 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"quantization_config\": {\n",
+ " \"_load_in_4bit\": true,\n",
+ " \"_load_in_8bit\": false,\n",
+ " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n",
+ " \"bnb_4bit_quant_storage\": \"uint8\",\n",
+ " \"bnb_4bit_quant_type\": \"nf4\",\n",
+ " \"bnb_4bit_use_double_quant\": true,\n",
+ " \"llm_int8_enable_fp32_cpu_offload\": false,\n",
+ " \"llm_int8_has_fp16_weight\": false,\n",
+ " \"llm_int8_skip_modules\": null,\n",
+ " \"llm_int8_threshold\": 6.0,\n",
+ " \"load_in_4bit\": true,\n",
+ " \"load_in_8bit\": false,\n",
+ " \"quant_method\": \"bitsandbytes\"\n",
+ " },\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "[INFO|:482] 2024-07-05 07:48:28,348 >> \n",
+ "\n",
+ "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
+ "\n",
+ "\n",
+ "{'train_runtime': 5527.5332, 'train_samples_per_second': 4.865, 'train_steps_per_second': 0.608, 'train_loss': 0.927943646074051, 'epoch': 6.0}\n",
+ "100%|█████████████████████████████████████| 3360/3360 [1:32:01<00:00, 1.64s/it]\n",
+ "[INFO|trainer.py:3478] 2024-07-05 07:48:28,351 >> Saving model checkpoint to saves/qwen2-0.5b/lora/sft\n",
+ "[INFO|configuration_utils.py:733] 2024-07-05 07:48:29,375 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-0.5b-instruct-bnb-4bit/snapshots/c3b24ce4827d69f5c3bde9aba00047774069ab72/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-05 07:48:29,376 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"quantization_config\": {\n",
+ " \"_load_in_4bit\": true,\n",
+ " \"_load_in_8bit\": false,\n",
+ " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n",
+ " \"bnb_4bit_quant_storage\": \"uint8\",\n",
+ " \"bnb_4bit_quant_type\": \"nf4\",\n",
+ " \"bnb_4bit_use_double_quant\": true,\n",
+ " \"llm_int8_enable_fp32_cpu_offload\": false,\n",
+ " \"llm_int8_has_fp16_weight\": false,\n",
+ " \"llm_int8_skip_modules\": null,\n",
+ " \"llm_int8_threshold\": 6.0,\n",
+ " \"load_in_4bit\": true,\n",
+ " \"load_in_8bit\": false,\n",
+ " \"quant_method\": \"bitsandbytes\"\n",
+ " },\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "***** train metrics *****\n",
+ " epoch = 5.9973\n",
+ " total_flos = 6320365GF\n",
+ " train_loss = 0.9279\n",
+ " train_runtime = 1:32:07.53\n",
+ " train_samples_per_second = 4.865\n",
+ " train_steps_per_second = 0.608\n",
+ "Figure saved at: saves/qwen2-0.5b/lora/sft/training_loss.png\n",
+ "Figure saved at: saves/qwen2-0.5b/lora/sft/training_eval_loss.png\n",
+ "[INFO|trainer.py:3788] 2024-07-05 07:48:29,751 >> \n",
+ "***** Running Evaluation *****\n",
+ "[INFO|trainer.py:3790] 2024-07-05 07:48:29,752 >> Num examples = 46\n",
+ "[INFO|trainer.py:3793] 2024-07-05 07:48:29,752 >> Batch size = 1\n",
+ "100%|███████████████████████████████████████████| 46/46 [00:03<00:00, 15.10it/s]\n",
+ "***** eval metrics *****\n",
+ " epoch = 5.9973\n",
+ " eval_loss = 3.5429\n",
+ " eval_runtime = 0:00:03.16\n",
+ " eval_samples_per_second = 14.532\n",
+ " eval_steps_per_second = 14.532\n",
+ "[INFO|modelcard.py:449] 2024-07-05 07:48:32,920 >> Dropping the following result as it does not have all the necessary fields:\n",
+ "{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}}\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: / 0.561 MB of 0.561 MB uploaded\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss ▁▁▃▄▆██\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime ▁▂▃▂▄▂█\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second █▇▆▇▅▇▁\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second █▇▆▇▅▇▁\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch ▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step ▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/grad_norm ▂▁▁▂▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂█▂▂▂▂▁▁▁▂▁▁▁▁\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate ▂▄▅▇██████▇▇▇▇▇▆▆▆▆▅▅▅▄▄▄▃▃▃▃▂▂▂▂▁▁▁▁▁▁▁\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss ████▇▇▇▅▆▆▆▆▆▆▄▄▄▄▄▄▂▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: \n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss 3.54292\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime 3.1655\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second 14.532\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second 14.532\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: total_flos 6786441021493248.0\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch 5.99732\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step 3360\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/grad_norm 2.46277\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate 0.0\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss 0.1189\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train_loss 0.92794\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train_runtime 5527.5332\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train_samples_per_second 4.865\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: train_steps_per_second 0.608\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: \n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mqwen2_0.5b_lora_sft\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface/runs/3amepb0m\u001b[0m\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at: \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface\u001b[0m\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 6 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20240705_061623-3amepb0m/logs\u001b[0m\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m The new W&B backend becomes opt-out in version 0.18.0; try it out with `wandb.require(\"core\")`! See https://wandb.me/wandb-core for more information.\n",
+ "CPU times: user 1min 4s, sys: 21.9 s, total: 1min 26s\n",
+ "Wall time: 1h 33min 9s\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "!./scripts/tune-lf.sh config/qwen2_0.5b_lora_sft_unsloth.yaml"
+ ]
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "application/vnd.databricks.v1+notebook": {
+ "dashboards": [],
+ "environmentMetadata": null,
+ "language": "python",
+ "notebookMetadata": {
+ "pythonIndentUnit": 4
+ },
+ "notebookName": "07_MAC_+_Qwen2-7B-Instructi_Unsloth_train",
+ "widgets": {}
+ },
+ "colab": {
+ "gpuType": "T4",
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.9"
+ },
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "036fc5746f43416db18c19ad8fd36677": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "06e806c82c7b4cbea31c5358dd9c3434": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "087b76a8b7514269b1f0ab29b062e444": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_a069d2ab23824f29aa320ac256e2cfe9",
+ "placeholder": "",
+ "style": "IPY_MODEL_06e806c82c7b4cbea31c5358dd9c3434",
+ "value": "Map (num_proc=2): 100%"
+ }
+ },
+ "09b76013aa9e45efb6deb23a7a0d0925": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_dea41c5260884aa6879b5e1d1697b14f",
+ "placeholder": "",
+ "style": "IPY_MODEL_89965917796a4f81b899fdc7685f33df",
+ "value": "config.json: 100%"
+ }
+ },
+ "0a92c56bfa134ef583220d7ef0b13e17": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "0c34be936c8145d3ab41282f30a70713": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "0f8b6bfe16894500838793f2491d403f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "177c78fce95d4b4ab33057c5a048d693": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "1f44c9ce1adf470cbb19784493ed209f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_0c34be936c8145d3ab41282f30a70713",
+ "placeholder": "",
+ "style": "IPY_MODEL_0a92c56bfa134ef583220d7ef0b13e17",
+ "value": "model.safetensors: 100%"
+ }
+ },
+ "201b59ccd9f845e197029b57e424aefc": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "2157f01726d748f8a9ae4a00664430da": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "21db8a77b00d4a4e82fdfa608657531f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "26e4202cca81496a90d15a0dd4ca9cf1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_ba90fdb8822d47dab7ba203bee297f37",
+ "IPY_MODEL_61560ff6a36b44f4a9dfdae5c52791d4",
+ "IPY_MODEL_95fbe66647904c06a20f640630d6dc0e"
+ ],
+ "layout": "IPY_MODEL_57182a263d324a3dbf1471c74290a0d5"
+ }
+ },
+ "27155728b6b84cb199c91c940095d0a8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_6b91feeed5464877991ac2c207aebe7c",
+ "IPY_MODEL_cca8113c54c0495daedce1327bf9c68b",
+ "IPY_MODEL_2e63a29e2f7247bba5beede9a568c99f"
+ ],
+ "layout": "IPY_MODEL_5c9d781c28944f3eb86e2a6d44efdf18"
+ }
+ },
+ "271ddaa553a042d09b6db7b450643d8f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "2a58d04b428c46f4b3dbadd3bc6cd529": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2d18ddf6482c4d97829ac0e5a7b9868f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_9f679ad3ec7f4fe8ad0510ffb57bc2ab",
+ "IPY_MODEL_f2df530d22c74977b249dd9fb5f4829b",
+ "IPY_MODEL_89b2ef0dbfea47ab8e6f8d659e3351d1"
+ ],
+ "layout": "IPY_MODEL_3056b148aa9f4e6e8aa3b61d26886255"
+ }
+ },
+ "2e5087c76f98437cb5dc729230358cba": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2e63a29e2f7247bba5beede9a568c99f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b993eaec6b224440bf80c0958c6fb536",
+ "placeholder": "",
+ "style": "IPY_MODEL_de868e26e7154f62aa86223a539ad421",
+ "value": " 464/464 [00:00<00:00, 27.1kB/s]"
+ }
+ },
+ "2f6c70dd266c4816bfad3fd3d192929a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "30307300bc4e4baf96560e30969a82b6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e36a3f9eff0e4cf68834d66b0213ae96",
+ "placeholder": "",
+ "style": "IPY_MODEL_a0037bdccf254159becde630bee3d1db",
+ "value": "generation_config.json: 100%"
+ }
+ },
+ "3056b148aa9f4e6e8aa3b61d26886255": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "30cdc32298134cb0be4d41615b9e5774": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "3572201bd4d74a58b7a665f9bdfdcdba": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "35b0e8c26d6640e9bd0ed7b242a423d8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_2e5087c76f98437cb5dc729230358cba",
+ "max": 51760,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_036fc5746f43416db18c19ad8fd36677",
+ "value": 51760
+ }
+ },
+ "36166c7bcb854b34aca1f41a5d6ea50b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "370692d819df41828b48c4ad446f977b": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "39b29a75374b45c0a22506010be2b84e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_30cdc32298134cb0be4d41615b9e5774",
+ "max": 1179,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_47928317548c454bba6358ab132e8dee",
+ "value": 1179
+ }
+ },
+ "3cf2dd993b5e4d3daecf61e4bab5a404": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_087b76a8b7514269b1f0ab29b062e444",
+ "IPY_MODEL_35b0e8c26d6640e9bd0ed7b242a423d8",
+ "IPY_MODEL_54ad89e05fd74576b9b8b5b5a10eaf8d"
+ ],
+ "layout": "IPY_MODEL_a41dc44766444a998bec2d777f249d23"
+ }
+ },
+ "43dec2ede91341f5af60eb522e18e984": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4463edd481c1467f914c7dcd6c6e6ffc": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "47928317548c454bba6358ab132e8dee": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "49277aeeac16434a865a4d12308b1abc": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4ae7e449e4ea4c729b5f34607c18ebae": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4b2061b8a73c43ffb0c2f83daf0d0183": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4c4c88d4c701450692fa0f6b0c5764b0": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4c666f4ace3943f8b80ecd20e7503236": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "4ccedf0d93094e63b57a0f8a434fba06": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4463edd481c1467f914c7dcd6c6e6ffc",
+ "max": 44307561,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_6d3b9a05db0b4dadb638c686faa0c40a",
+ "value": 44307561
+ }
+ },
+ "4dcf6ff672d24983a1877a8431709aa9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_5807d5fb827d490fb3bc698f801ffff5",
+ "placeholder": "",
+ "style": "IPY_MODEL_c4f2b06a82fd4987b8b659524a7b503b",
+ "value": "Generating train split: 100%"
+ }
+ },
+ "4ea63adfce694725bdba878aef709dd3": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5234566b1bfc4655b8d582ea5b46ed9f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "54ad89e05fd74576b9b8b5b5a10eaf8d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_fdb1941405ed4e4aa06019933892deb3",
+ "placeholder": "",
+ "style": "IPY_MODEL_668d5377ca56426a99753867e6e24862",
+ "value": " 51760/51760 [01:02<00:00, 1131.51 examples/s]"
+ }
+ },
+ "56aee4853b7740e6a977254f5d1fa66d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "57182a263d324a3dbf1471c74290a0d5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5807d5fb827d490fb3bc698f801ffff5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5c9d781c28944f3eb86e2a6d44efdf18": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5f40db8173dd4d76b6ef5ed6d9ec8b6e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "61560ff6a36b44f4a9dfdae5c52791d4": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_db19fc8d37db4e45a5790a876836d8c4",
+ "max": 11610,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_36166c7bcb854b34aca1f41a5d6ea50b",
+ "value": 11610
+ }
+ },
+ "6578fd7acdb54c4c93528ea431fd0144": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_370692d819df41828b48c4ad446f977b",
+ "placeholder": "",
+ "style": "IPY_MODEL_a0bf9160eb2647409b3200270914b90f",
+ "value": " 50.6k/50.6k [00:00<00:00, 2.71MB/s]"
+ }
+ },
+ "668d5377ca56426a99753867e6e24862": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "697f027529b54ee9956bae78a11e0611": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "69ac12aec0714318bf2c83d4f4e745f5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "6b2012c3f88547af8884a9ea90e3164b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_938f45f1b3e24118b815d96ae34ba86a",
+ "placeholder": "",
+ "style": "IPY_MODEL_9367047a800747f79c6b225d92397846",
+ "value": " 44.3M/44.3M [00:01<00:00, 31.0MB/s]"
+ }
+ },
+ "6b91feeed5464877991ac2c207aebe7c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4b2061b8a73c43ffb0c2f83daf0d0183",
+ "placeholder": "",
+ "style": "IPY_MODEL_69ac12aec0714318bf2c83d4f4e745f5",
+ "value": "special_tokens_map.json: 100%"
+ }
+ },
+ "6d3b9a05db0b4dadb638c686faa0c40a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "6dbbedeca9314e66ae50e44ffa31a414": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "6e34619b45934040b6092e6fb01ea7fe": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "71ce208e20d6483abb9ed923510c86d7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d69dc491b3ab44d7852b21873ed7bb7f",
+ "placeholder": "",
+ "style": "IPY_MODEL_f401d53bf28e44eb906bce6c05412662",
+ "value": " 51760/51760 [00:01<00:00, 45512.81 examples/s]"
+ }
+ },
+ "7358cdad832342c983e31efb8754ab78": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "73e352a3404f4c7dad0737f57d29e92f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_988a0e8c1f89446086858da0a891a79c",
+ "IPY_MODEL_4ccedf0d93094e63b57a0f8a434fba06",
+ "IPY_MODEL_6b2012c3f88547af8884a9ea90e3164b"
+ ],
+ "layout": "IPY_MODEL_7e29cb8dd4df4d5b94407cd8fd3f2011"
+ }
+ },
+ "74501720ac7e4dbb911a4a99b3633bc6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "78e5400bff924a92a4cc61c4ff18b182": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b9b313fd861948f5aba25b24b1518d30",
+ "placeholder": "",
+ "style": "IPY_MODEL_4c666f4ace3943f8b80ecd20e7503236",
+ "value": " 1.18k/1.18k [00:00<00:00, 31.3kB/s]"
+ }
+ },
+ "7975adbc2ec5489ea7fa0167e620d85c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_6e34619b45934040b6092e6fb01ea7fe",
+ "max": 51760,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_271ddaa553a042d09b6db7b450643d8f",
+ "value": 51760
+ }
+ },
+ "7e29cb8dd4df4d5b94407cd8fd3f2011": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "810ff6c0e17d4fa09a30fef27eacff90": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "89965917796a4f81b899fdc7685f33df": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "89b2ef0dbfea47ab8e6f8d659e3351d1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b8908fa0df3743ecb9d12983a739104f",
+ "placeholder": "",
+ "style": "IPY_MODEL_177c78fce95d4b4ab33057c5a048d693",
+ "value": " 9.09M/9.09M [00:00<00:00, 32.6MB/s]"
+ }
+ },
+ "8b3505352a5a42bf910428c40ce40465": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_49277aeeac16434a865a4d12308b1abc",
+ "placeholder": "",
+ "style": "IPY_MODEL_2157f01726d748f8a9ae4a00664430da",
+ "value": " 5.70G/5.70G [01:02<00:00, 30.1MB/s]"
+ }
+ },
+ "8fc142b628fb40568730234de1cafde2": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4ae7e449e4ea4c729b5f34607c18ebae",
+ "max": 172,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_3572201bd4d74a58b7a665f9bdfdcdba",
+ "value": 172
+ }
+ },
+ "9367047a800747f79c6b225d92397846": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "938f45f1b3e24118b815d96ae34ba86a": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "95fbe66647904c06a20f640630d6dc0e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b0a370dc20654b279b9680692e34418e",
+ "placeholder": "",
+ "style": "IPY_MODEL_cfeb365ddf7548d58b2557f22737fcf5",
+ "value": " 11.6k/11.6k [00:00<00:00, 716kB/s]"
+ }
+ },
+ "988a0e8c1f89446086858da0a891a79c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_ad2be500fc164c0f86f33e914ef8e6a0",
+ "placeholder": "",
+ "style": "IPY_MODEL_5234566b1bfc4655b8d582ea5b46ed9f",
+ "value": "Downloading data: 100%"
+ }
+ },
+ "98c58f23f4d549518832cb2d18f796e8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_09b76013aa9e45efb6deb23a7a0d0925",
+ "IPY_MODEL_39b29a75374b45c0a22506010be2b84e",
+ "IPY_MODEL_78e5400bff924a92a4cc61c4ff18b182"
+ ],
+ "layout": "IPY_MODEL_2a58d04b428c46f4b3dbadd3bc6cd529"
+ }
+ },
+ "99fdbb0300c14c139d1937c646f0cfe7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_7358cdad832342c983e31efb8754ab78",
+ "placeholder": "",
+ "style": "IPY_MODEL_e9adf418296e436fb48bb9f78885598b",
+ "value": " 51760/51760 [00:01<00:00, 38665.95 examples/s]"
+ }
+ },
+ "9f679ad3ec7f4fe8ad0510ffb57bc2ab": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4ea63adfce694725bdba878aef709dd3",
+ "placeholder": "",
+ "style": "IPY_MODEL_74501720ac7e4dbb911a4a99b3633bc6",
+ "value": "tokenizer.json: 100%"
+ }
+ },
+ "a0037bdccf254159becde630bee3d1db": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "a069d2ab23824f29aa320ac256e2cfe9": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a0bf9160eb2647409b3200270914b90f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "a41dc44766444a998bec2d777f249d23": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a8464a4c711e4e00aafdfc919b60d07e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_fb995c740590427b882572c81d4e848c",
+ "placeholder": "",
+ "style": "IPY_MODEL_201b59ccd9f845e197029b57e424aefc",
+ "value": " 172/172 [00:00<00:00, 12.0kB/s]"
+ }
+ },
+ "a9f0cc51fc3d4d7b874c32dcf1c5bdf2": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ad2be500fc164c0f86f33e914ef8e6a0": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b0240cd9a4554b29ae11f8051984a1c6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_edaf890370314a218f138015faa0b05d",
+ "placeholder": "",
+ "style": "IPY_MODEL_697f027529b54ee9956bae78a11e0611",
+ "value": "Map: 100%"
+ }
+ },
+ "b0a370dc20654b279b9680692e34418e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b518dcee69074b87be73957cd810e7ed": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d891f8d0b1fc462f8008d02bb2a15692",
+ "placeholder": "",
+ "style": "IPY_MODEL_cced8fd7e998472794f3f3e3018956a5",
+ "value": "tokenizer_config.json: 100%"
+ }
+ },
+ "b8908fa0df3743ecb9d12983a739104f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b993eaec6b224440bf80c0958c6fb536": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b9b313fd861948f5aba25b24b1518d30": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ba90fdb8822d47dab7ba203bee297f37": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_0f8b6bfe16894500838793f2491d403f",
+ "placeholder": "",
+ "style": "IPY_MODEL_bb19f6c747754682a514373a3a0535ba",
+ "value": "Downloading readme: 100%"
+ }
+ },
+ "bb19f6c747754682a514373a3a0535ba": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "bc883d4cf13e4f8b8a4fe5f410cb6efd": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e9159e03e61f4f56978ece9c3bca49b2",
+ "max": 51760,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_810ff6c0e17d4fa09a30fef27eacff90",
+ "value": 51760
+ }
+ },
+ "c161d94df0f04feba9542237e0856c22": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "c22f71b1f85843209d7e5321506b9cb9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_1f44c9ce1adf470cbb19784493ed209f",
+ "IPY_MODEL_f1addc4479d849879e743cf9089e6540",
+ "IPY_MODEL_8b3505352a5a42bf910428c40ce40465"
+ ],
+ "layout": "IPY_MODEL_4c4c88d4c701450692fa0f6b0c5764b0"
+ }
+ },
+ "c4f2b06a82fd4987b8b659524a7b503b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "cca8113c54c0495daedce1327bf9c68b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e02f9b7849c64531835eb77b860d1c93",
+ "max": 464,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_56aee4853b7740e6a977254f5d1fa66d",
+ "value": 464
+ }
+ },
+ "cced8fd7e998472794f3f3e3018956a5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "cf245afeb1c04f29a24d291608c3d157": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_b518dcee69074b87be73957cd810e7ed",
+ "IPY_MODEL_e29104486d594b2992d7285e0ef77371",
+ "IPY_MODEL_6578fd7acdb54c4c93528ea431fd0144"
+ ],
+ "layout": "IPY_MODEL_d35db8148a354c56aaac56dbae22536f"
+ }
+ },
+ "cfe8cae0e22b495bafa221a63d13b283": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "cfeb365ddf7548d58b2557f22737fcf5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "d1b47d39450d4019ae85c9b2f943eeaf": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_4dcf6ff672d24983a1877a8431709aa9",
+ "IPY_MODEL_7975adbc2ec5489ea7fa0167e620d85c",
+ "IPY_MODEL_71ce208e20d6483abb9ed923510c86d7"
+ ],
+ "layout": "IPY_MODEL_cfe8cae0e22b495bafa221a63d13b283"
+ }
+ },
+ "d35db8148a354c56aaac56dbae22536f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d69dc491b3ab44d7852b21873ed7bb7f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d891f8d0b1fc462f8008d02bb2a15692": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d8e5318cead340c4adbeaccc05d39225": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "daf4cd890b35422683d22fd30bc71e83": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_b0240cd9a4554b29ae11f8051984a1c6",
+ "IPY_MODEL_bc883d4cf13e4f8b8a4fe5f410cb6efd",
+ "IPY_MODEL_99fdbb0300c14c139d1937c646f0cfe7"
+ ],
+ "layout": "IPY_MODEL_c161d94df0f04feba9542237e0856c22"
+ }
+ },
+ "db19fc8d37db4e45a5790a876836d8c4": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "de868e26e7154f62aa86223a539ad421": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "dea41c5260884aa6879b5e1d1697b14f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e02f9b7849c64531835eb77b860d1c93": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e29104486d594b2992d7285e0ef77371": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_a9f0cc51fc3d4d7b874c32dcf1c5bdf2",
+ "max": 50641,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_2f6c70dd266c4816bfad3fd3d192929a",
+ "value": 50641
+ }
+ },
+ "e36a3f9eff0e4cf68834d66b0213ae96": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e9159e03e61f4f56978ece9c3bca49b2": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e9adf418296e436fb48bb9f78885598b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "edaf890370314a218f138015faa0b05d": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "f1addc4479d849879e743cf9089e6540": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_43dec2ede91341f5af60eb522e18e984",
+ "max": 5702746405,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_d8e5318cead340c4adbeaccc05d39225",
+ "value": 5702746405
+ }
+ },
+ "f2df530d22c74977b249dd9fb5f4829b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_21db8a77b00d4a4e82fdfa608657531f",
+ "max": 9085698,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_6dbbedeca9314e66ae50e44ffa31a414",
+ "value": 9085698
+ }
+ },
+ "f401d53bf28e44eb906bce6c05412662": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "fb995c740590427b882572c81d4e848c": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "fce7a61c25ec4390af43d92b7c473a45": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_30307300bc4e4baf96560e30969a82b6",
+ "IPY_MODEL_8fc142b628fb40568730234de1cafde2",
+ "IPY_MODEL_a8464a4c711e4e00aafdfc919b60d07e"
+ ],
+ "layout": "IPY_MODEL_5f40db8173dd4d76b6ef5ed6d9ec8b6e"
+ }
+ },
+ "fdb1941405ed4e4aa06019933892deb3": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ }
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/notebooks/08_eval-lf-py3.11.ipynb b/notebooks/08_eval-lf-py3.11.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..7d84f46baefc150de3a9123435a97c562bbf6b74
--- /dev/null
+++ b/notebooks/08_eval-lf-py3.11.ipynb
@@ -0,0 +1,6437 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "0ea8b46b-839b-445b-8043-ccdf4e920ace",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [],
+ "source": [
+ "%load_ext autoreload\n",
+ "%autoreload 2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "6d394937-6c99-4a7c-9d32-7600a280032f",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "workding dir: /home/inflaton/code/projects/courses/llm-finetuning\n"
+ ]
+ }
+ ],
+ "source": [
+ "import os\n",
+ "import sys\n",
+ "from pathlib import Path\n",
+ "\n",
+ "workding_dir = str(Path.cwd().parent)\n",
+ "os.chdir(workding_dir)\n",
+ "sys.path.append(workding_dir)\n",
+ "print(\"workding dir:\", workding_dir)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "9f67ec60-2f24-411c-84eb-0dd664b44775",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "True"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from dotenv import find_dotenv, load_dotenv\n",
+ "\n",
+ "found_dotenv = find_dotenv(\".env\")\n",
+ "\n",
+ "if len(found_dotenv) == 0:\n",
+ " found_dotenv = find_dotenv(\".env.example\")\n",
+ "print(f\"loading env vars from: {found_dotenv}\")\n",
+ "load_dotenv(found_dotenv, override=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "application/vnd.databricks.v1+cell": {
+ "cellMetadata": {},
+ "inputWidgets": {},
+ "nuid": "f1597656-8042-4878-9d3b-9ebfb8dd86dc",
+ "showTitle": false,
+ "title": ""
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "('unsloth/Qwen2-0.5B-Instruct',\n",
+ " True,\n",
+ " None,\n",
+ " None,\n",
+ " 2048,\n",
+ " 10,\n",
+ " None,\n",
+ " 'datasets/mac/mac.tsv',\n",
+ " 'results/mac-results_lf.csv')"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import os\n",
+ "\n",
+ "model_name = os.getenv(\"MODEL_NAME\")\n",
+ "token = os.getenv(\"HF_TOKEN\") or None\n",
+ "load_in_4bit = os.getenv(\"LOAD_IN_4BIT\") == \"true\"\n",
+ "local_model = os.getenv(\"LOCAL_MODEL\")\n",
+ "hub_model = os.getenv(\"HUB_MODEL\")\n",
+ "num_train_epochs = int(os.getenv(\"NUM_TRAIN_EPOCHS\") or 0)\n",
+ "data_path = os.getenv(\"DATA_PATH\")\n",
+ "results_path = os.getenv(\"RESULTS_PATH\")\n",
+ "\n",
+ "max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!\n",
+ "dtype = (\n",
+ " None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+\n",
+ ")\n",
+ "\n",
+ "model_name, load_in_4bit, local_model, hub_model, max_seq_length, num_train_epochs, dtype, data_path, results_path"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Sun Jun 30 13:21:10 2024 \n",
+ "+---------------------------------------------------------------------------------------+\n",
+ "| NVIDIA-SMI 545.23.07 Driver Version: 546.12 CUDA Version: 12.3 |\n",
+ "|-----------------------------------------+----------------------+----------------------+\n",
+ "| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
+ "| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n",
+ "| | | MIG M. |\n",
+ "|=========================================+======================+======================|\n",
+ "| 0 NVIDIA GeForce RTX 4080 ... On | 00000000:01:00.0 Off | N/A |\n",
+ "| N/A 49C P8 3W / 150W | 194MiB / 12282MiB | 0% Default |\n",
+ "| | | N/A |\n",
+ "+-----------------------------------------+----------------------+----------------------+\n",
+ " \n",
+ "+---------------------------------------------------------------------------------------+\n",
+ "| Processes: |\n",
+ "| GPU GI CI PID Type Process name GPU Memory |\n",
+ "| ID ID Usage |\n",
+ "|=======================================================================================|\n",
+ "| No running processes found |\n",
+ "+---------------------------------------------------------------------------------------+\n"
+ ]
+ }
+ ],
+ "source": [
+ "!nvidia-smi"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Python 3.11.9\n",
+ "\u001b[33mWARNING: Package(s) not found: flash-attn\u001b[0m\u001b[33m\n",
+ "\u001b[0mCPU times: user 32 ms, sys: 10.6 ms, total: 42.6 ms\n",
+ "Wall time: 1.23 s\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "!python --version\n",
+ "!pip show flash-attn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Epoch 1\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+ "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n",
+ "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n",
+ "Qwen/Qwen2-0.5B-Instruct llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-560 True datasets/mac/mac.tsv results/mac-results_lf.csv\n",
+ "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.0 GB of memory reserved.\n",
+ "loading model: Qwen/Qwen2-0.5B-Instruct\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 13:21:34,519 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/vocab.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 13:21:34,519 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/merges.txt\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 13:21:34,519 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 13:21:34,519 >> loading file added_tokens.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 13:21:34,519 >> loading file special_tokens_map.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 13:21:34,520 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer_config.json\n",
+ "[WARNING|logging.py:313] 2024-06-30 13:21:34,863 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "06/30/2024 13:21:34 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+ "06/30/2024 13:21:34 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+ "[INFO|configuration_utils.py:733] 2024-06-30 13:21:35,179 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-06-30 13:21:35,181 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "06/30/2024 13:21:35 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n",
+ "06/30/2024 13:21:35 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
+ "[INFO|modeling_utils.py:3556] 2024-06-30 13:21:35,287 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/model.safetensors\n",
+ "[INFO|modeling_utils.py:1531] 2024-06-30 13:21:37,852 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+ "[INFO|configuration_utils.py:1000] 2024-06-30 13:21:37,860 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645\n",
+ "}\n",
+ "\n",
+ "[INFO|modeling_utils.py:4364] 2024-06-30 13:22:34,747 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+ "\n",
+ "[INFO|modeling_utils.py:4372] 2024-06-30 13:22:34,747 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-0.5B-Instruct.\n",
+ "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+ "[INFO|configuration_utils.py:955] 2024-06-30 13:22:35,055 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/generation_config.json\n",
+ "[INFO|configuration_utils.py:1000] 2024-06-30 13:22:35,055 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"do_sample\": true,\n",
+ " \"eos_token_id\": [\n",
+ " 151645,\n",
+ " 151643\n",
+ " ],\n",
+ " \"pad_token_id\": 151643,\n",
+ " \"repetition_penalty\": 1.1,\n",
+ " \"temperature\": 0.7,\n",
+ " \"top_k\": 20,\n",
+ " \"top_p\": 0.8\n",
+ "}\n",
+ "\n",
+ "06/30/2024 13:22:35 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
+ "06/30/2024 13:22:36 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-560\n",
+ "06/30/2024 13:22:36 - INFO - llamafactory.model.loader - all params: 498,431,872\n",
+ "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.666 GB of memory reserved.\n",
+ "loading train/test data files\n",
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n",
+ "Evaluating model: Qwen/Qwen2-0.5B-Instruct\n",
+ " 0%| | 0/1133 [00:00, ?it/s]--------\n",
+ "step 1: Old Trinket raised his gun and squinted his triangular eye. The trigger sounded as if a bullet had been shot; the bullets ricocheted off of the branches like hailstones. The sound was so loud that it could be heard from miles away.<|im_end|>\n",
+ "--------\n",
+ "step 2: Old Trinket raised his gun and squinted his triangular eye. The trigger sounded as if a bullet had been shot; the bullets ricocheted off of the branches like hailstones. The sound was so loud that it could be heard from miles away.\n",
+ "--------\n",
+ "step 3: Old Trinket raised his gun and squinted his triangular eye. The trigger sounded as if a bullet had been shot; the bullets ricocheted off of the branches like hailstones. The sound was so loud that it could be heard from miles away.\n",
+ "100%|█████████████████████████████████████| 1133/1133 [1:43:14<00:00, 5.47s/it]\n",
+ "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.9 GB of memory reserved.\n",
+ " chinese ... Qwen/Qwen2-0.5B-Instruct_checkpoint-560\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Trinket raised his gun and squinted his tr...\n",
+ "\n",
+ "[1 rows x 3 columns]\n",
+ "{'accuracy': 0.00088261253309797, 'correct_ids': [272], 'meteor': 0.28906766286950575, 'bleu_scores': {'bleu': 0.05350226890847294, 'precisions': [0.34546985517009093, 0.08439261827222748, 0.02716499544211486, 0.011066742726754135], 'brevity_penalty': 0.9833003245834433, 'length_ratio': 0.9834382245776747, 'translation_length': 29690, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.32218455635719456, 'rouge2': 0.09323903991316618, 'rougeL': 0.26091815189986767, 'rougeLsum': 0.2609816275457334}}\n",
+ "Epoch 2\n",
+ "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+ "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n",
+ "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n",
+ "Qwen/Qwen2-0.5B-Instruct llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-1120 True datasets/mac/mac.tsv results/mac-results_lf.csv\n",
+ "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.0 GB of memory reserved.\n",
+ "loading model: Qwen/Qwen2-0.5B-Instruct\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 15:06:25,573 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/vocab.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 15:06:25,574 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/merges.txt\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 15:06:25,574 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 15:06:25,574 >> loading file added_tokens.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 15:06:25,574 >> loading file special_tokens_map.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 15:06:25,574 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer_config.json\n",
+ "[WARNING|logging.py:313] 2024-06-30 15:06:25,971 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "06/30/2024 15:06:25 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+ "06/30/2024 15:06:25 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+ "[INFO|configuration_utils.py:733] 2024-06-30 15:06:26,308 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-06-30 15:06:26,309 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "06/30/2024 15:06:26 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n",
+ "06/30/2024 15:06:26 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
+ "[INFO|modeling_utils.py:3556] 2024-06-30 15:06:26,450 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/model.safetensors\n",
+ "[INFO|modeling_utils.py:1531] 2024-06-30 15:06:28,647 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+ "[INFO|configuration_utils.py:1000] 2024-06-30 15:06:28,655 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645\n",
+ "}\n",
+ "\n",
+ "[INFO|modeling_utils.py:4364] 2024-06-30 15:07:24,483 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+ "\n",
+ "[INFO|modeling_utils.py:4372] 2024-06-30 15:07:24,484 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-0.5B-Instruct.\n",
+ "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+ "[INFO|configuration_utils.py:955] 2024-06-30 15:07:24,816 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/generation_config.json\n",
+ "[INFO|configuration_utils.py:1000] 2024-06-30 15:07:24,816 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"do_sample\": true,\n",
+ " \"eos_token_id\": [\n",
+ " 151645,\n",
+ " 151643\n",
+ " ],\n",
+ " \"pad_token_id\": 151643,\n",
+ " \"repetition_penalty\": 1.1,\n",
+ " \"temperature\": 0.7,\n",
+ " \"top_k\": 20,\n",
+ " \"top_p\": 0.8\n",
+ "}\n",
+ "\n",
+ "06/30/2024 15:07:25 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
+ "06/30/2024 15:07:25 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-1120\n",
+ "06/30/2024 15:07:25 - INFO - llamafactory.model.loader - all params: 498,431,872\n",
+ "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.666 GB of memory reserved.\n",
+ "loading train/test data files\n",
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n",
+ "Evaluating model: Qwen/Qwen2-0.5B-Instruct\n",
+ " 0%| | 0/1133 [00:00, ?it/s]--------\n",
+ "step 1: Old Geng raised his gun, his eyes narrowed. The shotgun fired; a deafening boom of gunfire followed, crickets chirping in the air, the sound like the cracking of ice chips on branches.<|im_end|>\n",
+ "--------\n",
+ "step 2: Old Geng raised his gun, his eyes narrowed. The shotgun fired; a deafening boom of gunfire followed, crickets chirping in the air, the sound like the cracking of ice chips on branches.\n",
+ "--------\n",
+ "step 3: Old Geng raised his gun, his eyes narrowed. The shotgun fired; a deafening boom of gunfire followed, crickets chirping in the air, the sound like the cracking of ice chips on branches.\n",
+ "100%|███████████████████████████████████████| 1133/1133 [46:43<00:00, 2.47s/it]\n",
+ "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.9 GB of memory reserved.\n",
+ " chinese ... Qwen/Qwen2-0.5B-Instruct_checkpoint-1120\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Geng raised his gun, his eyes narrowed. Th...\n",
+ "\n",
+ "[1 rows x 4 columns]\n",
+ "{'accuracy': 0.00088261253309797, 'correct_ids': [659], 'meteor': 0.3075388134142166, 'bleu_scores': {'bleu': 0.06482340202869877, 'precisions': [0.36907098754416645, 0.10273004537677602, 0.038322655794991264, 0.01656785511248274], 'brevity_penalty': 0.9254426305194808, 'length_ratio': 0.9280887711162636, 'translation_length': 28019, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.34374303845779386, 'rouge2': 0.11899790599832506, 'rougeL': 0.2851818971023854, 'rougeLsum': 0.285674896233578}}\n",
+ "Epoch 3\n",
+ "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+ "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n",
+ "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n",
+ "Qwen/Qwen2-0.5B-Instruct llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-1680 True datasets/mac/mac.tsv results/mac-results_lf.csv\n",
+ "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.0 GB of memory reserved.\n",
+ "loading model: Qwen/Qwen2-0.5B-Instruct\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 15:54:26,677 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/vocab.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 15:54:26,678 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/merges.txt\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 15:54:26,678 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 15:54:26,678 >> loading file added_tokens.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 15:54:26,678 >> loading file special_tokens_map.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 15:54:26,678 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer_config.json\n",
+ "[WARNING|logging.py:313] 2024-06-30 15:54:26,803 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "06/30/2024 15:54:26 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+ "06/30/2024 15:54:26 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+ "[INFO|configuration_utils.py:733] 2024-06-30 15:54:27,176 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-06-30 15:54:27,177 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "06/30/2024 15:54:27 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n",
+ "06/30/2024 15:54:27 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
+ "[INFO|modeling_utils.py:3556] 2024-06-30 15:54:27,212 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/model.safetensors\n",
+ "[INFO|modeling_utils.py:1531] 2024-06-30 15:54:27,943 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+ "[INFO|configuration_utils.py:1000] 2024-06-30 15:54:27,946 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645\n",
+ "}\n",
+ "\n",
+ "[INFO|modeling_utils.py:4364] 2024-06-30 15:54:50,953 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+ "\n",
+ "[INFO|modeling_utils.py:4372] 2024-06-30 15:54:50,954 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-0.5B-Instruct.\n",
+ "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+ "[INFO|configuration_utils.py:955] 2024-06-30 15:54:51,228 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/generation_config.json\n",
+ "[INFO|configuration_utils.py:1000] 2024-06-30 15:54:51,228 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"do_sample\": true,\n",
+ " \"eos_token_id\": [\n",
+ " 151645,\n",
+ " 151643\n",
+ " ],\n",
+ " \"pad_token_id\": 151643,\n",
+ " \"repetition_penalty\": 1.1,\n",
+ " \"temperature\": 0.7,\n",
+ " \"top_k\": 20,\n",
+ " \"top_p\": 0.8\n",
+ "}\n",
+ "\n",
+ "06/30/2024 15:54:51 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
+ "06/30/2024 15:54:51 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-1680\n",
+ "06/30/2024 15:54:51 - INFO - llamafactory.model.loader - all params: 498,431,872\n",
+ "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.666 GB of memory reserved.\n",
+ "loading train/test data files\n",
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n",
+ "Evaluating model: Qwen/Qwen2-0.5B-Instruct\n",
+ " 0%| | 0/1133 [00:00, ?it/s]--------\n",
+ "step 1: Old Geng held his gun up, half-closed, and cocked it as if he was aiming for a bullet. The barrel cracked, and bullets flew down like ice nuggets; the leaves on the riverbank splashed like cannonballs.<|im_end|>\n",
+ "--------\n",
+ "step 2: Old Geng held his gun up, half-closed, and cocked it as if he was aiming for a bullet. The barrel cracked, and bullets flew down like ice nuggets; the leaves on the riverbank splashed like cannonballs.\n",
+ "--------\n",
+ "step 3: Old Geng held his gun up, half-closed, and cocked it as if he was aiming for a bullet. The barrel cracked, and bullets flew down like ice nuggets; the leaves on the riverbank splashed like cannonballs.\n",
+ "100%|███████████████████████████████████████| 1133/1133 [44:44<00:00, 2.37s/it]\n",
+ "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "4.24 GB of memory reserved.\n",
+ " chinese ... Qwen/Qwen2-0.5B-Instruct_checkpoint-1680\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Geng held his gun up, half-closed, and coc...\n",
+ "\n",
+ "[1 rows x 5 columns]\n",
+ "{'accuracy': 0.00353045013239188, 'correct_ids': [147, 194, 202, 364], 'meteor': 0.3232125016634757, 'bleu_scores': {'bleu': 0.06687635711488571, 'precisions': [0.33171058236475387, 0.0956102480068068, 0.03666427030913012, 0.017202185050724392], 'brevity_penalty': 1.0, 'length_ratio': 1.0886386220602848, 'translation_length': 32866, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.352664915385991, 'rouge2': 0.1232869942455126, 'rougeL': 0.2909052156293055, 'rougeLsum': 0.2907588163008441}}\n",
+ "Epoch 4\n",
+ "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+ "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n",
+ "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n",
+ "Qwen/Qwen2-0.5B-Instruct llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-2240 True datasets/mac/mac.tsv results/mac-results_lf.csv\n",
+ "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.0 GB of memory reserved.\n",
+ "loading model: Qwen/Qwen2-0.5B-Instruct\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 16:39:54,252 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/vocab.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 16:39:54,252 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/merges.txt\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 16:39:54,252 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 16:39:54,252 >> loading file added_tokens.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 16:39:54,252 >> loading file special_tokens_map.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 16:39:54,252 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer_config.json\n",
+ "[WARNING|logging.py:313] 2024-06-30 16:39:54,394 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "06/30/2024 16:39:54 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+ "06/30/2024 16:39:54 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+ "[INFO|configuration_utils.py:733] 2024-06-30 16:39:54,662 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-06-30 16:39:54,663 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "06/30/2024 16:39:54 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n",
+ "06/30/2024 16:39:54 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
+ "[INFO|modeling_utils.py:3556] 2024-06-30 16:39:54,705 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/model.safetensors\n",
+ "[INFO|modeling_utils.py:1531] 2024-06-30 16:39:55,523 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+ "[INFO|configuration_utils.py:1000] 2024-06-30 16:39:55,526 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645\n",
+ "}\n",
+ "\n",
+ "[INFO|modeling_utils.py:4364] 2024-06-30 16:40:17,339 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+ "\n",
+ "[INFO|modeling_utils.py:4372] 2024-06-30 16:40:17,339 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-0.5B-Instruct.\n",
+ "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+ "[INFO|configuration_utils.py:955] 2024-06-30 16:40:17,617 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/generation_config.json\n",
+ "[INFO|configuration_utils.py:1000] 2024-06-30 16:40:17,617 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"do_sample\": true,\n",
+ " \"eos_token_id\": [\n",
+ " 151645,\n",
+ " 151643\n",
+ " ],\n",
+ " \"pad_token_id\": 151643,\n",
+ " \"repetition_penalty\": 1.1,\n",
+ " \"temperature\": 0.7,\n",
+ " \"top_k\": 20,\n",
+ " \"top_p\": 0.8\n",
+ "}\n",
+ "\n",
+ "06/30/2024 16:40:17 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
+ "06/30/2024 16:40:18 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-2240\n",
+ "06/30/2024 16:40:18 - INFO - llamafactory.model.loader - all params: 498,431,872\n",
+ "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.666 GB of memory reserved.\n",
+ "loading train/test data files\n",
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n",
+ "Evaluating model: Qwen/Qwen2-0.5B-Instruct\n",
+ " 0%| | 0/1133 [00:00, ?it/s]--------\n",
+ "step 1: Old Geng raised his gun, his triangular eye half-lit. The trigger sounded as he fired, bullets raining down like a cold stinker from skyward. The metal chattering sounded as if it had broken glass in the branches of the willows.<|im_end|>\n",
+ "--------\n",
+ "step 2: Old Geng raised his gun, his triangular eye half-lit. The trigger sounded as he fired, bullets raining down like a cold stinker from skyward. The metal chattering sounded as if it had broken glass in the branches of the willows.\n",
+ "--------\n",
+ "step 3: Old Geng raised his gun, his triangular eye half-lit. The trigger sounded as he fired, bullets raining down like a cold stinker from skyward. The metal chattering sounded as if it had broken glass in the branches of the willows.\n",
+ "100%|███████████████████████████████████████| 1133/1133 [57:01<00:00, 3.02s/it]\n",
+ "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "4.221 GB of memory reserved.\n",
+ " chinese ... Qwen/Qwen2-0.5B-Instruct_checkpoint-2240\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Geng raised his gun, his triangular eye ha...\n",
+ "\n",
+ "[1 rows x 6 columns]\n",
+ "{'accuracy': 0.00441306266548985, 'correct_ids': [147, 202, 364, 533, 850], 'meteor': 0.3141676906431015, 'bleu_scores': {'bleu': 0.05981782718505817, 'precisions': [0.2922991381706978, 0.08376151792634268, 0.033080163769061886, 0.01580821413223648], 'brevity_penalty': 1.0, 'length_ratio': 1.1914541238820802, 'translation_length': 35970, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.3493638878638674, 'rouge2': 0.1255400870123861, 'rougeL': 0.2910327113370838, 'rougeLsum': 0.2905461546619883}}\n",
+ "Epoch 5\n",
+ "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+ "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n",
+ "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n",
+ "Qwen/Qwen2-0.5B-Instruct llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-2800 True datasets/mac/mac.tsv results/mac-results_lf.csv\n",
+ "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.0 GB of memory reserved.\n",
+ "loading model: Qwen/Qwen2-0.5B-Instruct\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 17:37:38,874 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/vocab.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 17:37:38,874 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/merges.txt\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 17:37:38,875 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 17:37:38,875 >> loading file added_tokens.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 17:37:38,875 >> loading file special_tokens_map.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 17:37:38,875 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer_config.json\n",
+ "[WARNING|logging.py:313] 2024-06-30 17:37:39,004 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "06/30/2024 17:37:39 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+ "06/30/2024 17:37:39 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+ "[INFO|configuration_utils.py:733] 2024-06-30 17:37:39,272 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-06-30 17:37:39,272 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "06/30/2024 17:37:39 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n",
+ "06/30/2024 17:37:39 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
+ "[INFO|modeling_utils.py:3556] 2024-06-30 17:37:39,347 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/model.safetensors\n",
+ "[INFO|modeling_utils.py:1531] 2024-06-30 17:37:41,000 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+ "[INFO|configuration_utils.py:1000] 2024-06-30 17:37:41,003 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645\n",
+ "}\n",
+ "\n",
+ "[INFO|modeling_utils.py:4364] 2024-06-30 17:38:03,532 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+ "\n",
+ "[INFO|modeling_utils.py:4372] 2024-06-30 17:38:03,532 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-0.5B-Instruct.\n",
+ "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+ "[INFO|configuration_utils.py:955] 2024-06-30 17:38:03,825 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/generation_config.json\n",
+ "[INFO|configuration_utils.py:1000] 2024-06-30 17:38:03,825 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"do_sample\": true,\n",
+ " \"eos_token_id\": [\n",
+ " 151645,\n",
+ " 151643\n",
+ " ],\n",
+ " \"pad_token_id\": 151643,\n",
+ " \"repetition_penalty\": 1.1,\n",
+ " \"temperature\": 0.7,\n",
+ " \"top_k\": 20,\n",
+ " \"top_p\": 0.8\n",
+ "}\n",
+ "\n",
+ "06/30/2024 17:38:04 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
+ "06/30/2024 17:38:04 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-2800\n",
+ "06/30/2024 17:38:04 - INFO - llamafactory.model.loader - all params: 498,431,872\n",
+ "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.666 GB of memory reserved.\n",
+ "loading train/test data files\n",
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n",
+ "Evaluating model: Qwen/Qwen2-0.5B-Instruct\n",
+ " 0%| | 0/1133 [00:00, ?it/s]--------\n",
+ "step 1: Old Geng took out his pistol, squinted over a triangular brow, then fired. A hail of bullets fell like ice-crystals from the sky: crisscrossing branches crackled with sounds like a bucketful of cold metal sparrows flying through the air.<|im_end|>\n",
+ "--------\n",
+ "step 2: Old Geng took out his pistol, squinted over a triangular brow, then fired. A hail of bullets fell like ice-crystals from the sky: crisscrossing branches crackled with sounds like a bucketful of cold metal sparrows flying through the air.\n",
+ "--------\n",
+ "step 3: Old Geng took out his pistol, squinted over a triangular brow, then fired. A hail of bullets fell like ice-crystals from the sky: crisscrossing branches crackled with sounds like a bucketful of cold metal sparrows flying through the air.\n",
+ "100%|███████████████████████████████████████| 1133/1133 [44:54<00:00, 2.38s/it]\n",
+ "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "4.201 GB of memory reserved.\n",
+ " chinese ... Qwen/Qwen2-0.5B-Instruct_checkpoint-2800\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Geng took out his pistol, squinted over a ...\n",
+ "\n",
+ "[1 rows x 7 columns]\n",
+ "{'accuracy': 0.00264783759929391, 'correct_ids': [147, 194, 364], 'meteor': 0.31468732087511564, 'bleu_scores': {'bleu': 0.06531154622295796, 'precisions': [0.31492039110270875, 0.09110635696821516, 0.03624564735675847, 0.017496635262449527], 'brevity_penalty': 1.0, 'length_ratio': 1.121331566743955, 'translation_length': 33853, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.3477119790584883, 'rouge2': 0.12383470549112005, 'rougeL': 0.28723768855041154, 'rougeLsum': 0.287515203604385}}\n",
+ "Epoch 6\n",
+ "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+ "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n",
+ "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n",
+ "Qwen/Qwen2-0.5B-Instruct llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-3360 True datasets/mac/mac.tsv results/mac-results_lf.csv\n",
+ "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.0 GB of memory reserved.\n",
+ "loading model: Qwen/Qwen2-0.5B-Instruct\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 18:23:20,733 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/vocab.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 18:23:20,733 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/merges.txt\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 18:23:20,733 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 18:23:20,733 >> loading file added_tokens.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 18:23:20,733 >> loading file special_tokens_map.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 18:23:20,733 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer_config.json\n",
+ "[WARNING|logging.py:313] 2024-06-30 18:23:20,880 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "06/30/2024 18:23:20 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+ "06/30/2024 18:23:20 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+ "[INFO|configuration_utils.py:733] 2024-06-30 18:23:21,195 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-06-30 18:23:21,195 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "06/30/2024 18:23:21 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n",
+ "06/30/2024 18:23:21 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
+ "[INFO|modeling_utils.py:3556] 2024-06-30 18:23:21,271 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/model.safetensors\n",
+ "[INFO|modeling_utils.py:1531] 2024-06-30 18:23:23,604 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+ "[INFO|configuration_utils.py:1000] 2024-06-30 18:23:23,608 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645\n",
+ "}\n",
+ "\n",
+ "[INFO|modeling_utils.py:4364] 2024-06-30 18:23:50,830 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+ "\n",
+ "[INFO|modeling_utils.py:4372] 2024-06-30 18:23:50,830 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-0.5B-Instruct.\n",
+ "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+ "[INFO|configuration_utils.py:955] 2024-06-30 18:23:51,197 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/generation_config.json\n",
+ "[INFO|configuration_utils.py:1000] 2024-06-30 18:23:51,197 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"do_sample\": true,\n",
+ " \"eos_token_id\": [\n",
+ " 151645,\n",
+ " 151643\n",
+ " ],\n",
+ " \"pad_token_id\": 151643,\n",
+ " \"repetition_penalty\": 1.1,\n",
+ " \"temperature\": 0.7,\n",
+ " \"top_k\": 20,\n",
+ " \"top_p\": 0.8\n",
+ "}\n",
+ "\n",
+ "06/30/2024 18:23:51 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
+ "06/30/2024 18:23:51 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-3360\n",
+ "06/30/2024 18:23:51 - INFO - llamafactory.model.loader - all params: 498,431,872\n",
+ "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.666 GB of memory reserved.\n",
+ "loading train/test data files\n",
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n",
+ "Evaluating model: Qwen/Qwen2-0.5B-Instruct\n",
+ " 0%| | 0/1133 [00:00, ?it/s]--------\n",
+ "step 1: Old Geng held his rifle up and cocked it over his right eye. Then the shotgun fired, loud as a bolt of golden sparrows that fell like ice-cold quakes down on the field. The chattering bits of iron were heard splashing across the field, cracking and crunching.<|im_end|>\n",
+ "--------\n",
+ "step 2: Old Geng held his rifle up and cocked it over his right eye. Then the shotgun fired, loud as a bolt of golden sparrows that fell like ice-cold quakes down on the field. The chattering bits of iron were heard splashing across the field, cracking and crunching.\n",
+ "--------\n",
+ "step 3: Old Geng held his rifle up and cocked it over his right eye. Then the shotgun fired, loud as a bolt of golden sparrows that fell like ice-cold quakes down on the field. The chattering bits of iron were heard splashing across the field, cracking and crunching.\n",
+ "100%|███████████████████████████████████████| 1133/1133 [45:34<00:00, 2.41s/it]\n",
+ "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "4.26 GB of memory reserved.\n",
+ " chinese ... Qwen/Qwen2-0.5B-Instruct_checkpoint-3360\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Geng held his rifle up and cocked it over ...\n",
+ "\n",
+ "[1 rows x 8 columns]\n",
+ "{'accuracy': 0.00176522506619594, 'correct_ids': [272, 364], 'meteor': 0.3060953047058868, 'bleu_scores': {'bleu': 0.06197290227987762, 'precisions': [0.30625790139064474, 0.08672151109263164, 0.03420510771689357, 0.01623686723973257], 'brevity_penalty': 1.0, 'length_ratio': 1.1528320635972176, 'translation_length': 34804, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.33981449502350625, 'rouge2': 0.11735200363049994, 'rougeL': 0.2798705836787463, 'rougeLsum': 0.27962230715315634}}\n",
+ "Epoch 7\n",
+ "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+ "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n",
+ "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n",
+ "Qwen/Qwen2-0.5B-Instruct llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-3920 True datasets/mac/mac.tsv results/mac-results_lf.csv\n",
+ "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.0 GB of memory reserved.\n",
+ "loading model: Qwen/Qwen2-0.5B-Instruct\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 19:09:46,741 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/vocab.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 19:09:46,741 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/merges.txt\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 19:09:46,741 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 19:09:46,741 >> loading file added_tokens.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 19:09:46,741 >> loading file special_tokens_map.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 19:09:46,741 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer_config.json\n",
+ "[WARNING|logging.py:313] 2024-06-30 19:09:46,876 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "06/30/2024 19:09:46 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+ "06/30/2024 19:09:46 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+ "[INFO|configuration_utils.py:733] 2024-06-30 19:09:47,204 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-06-30 19:09:47,204 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "06/30/2024 19:09:47 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n",
+ "06/30/2024 19:09:47 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
+ "[INFO|modeling_utils.py:3556] 2024-06-30 19:09:47,246 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/model.safetensors\n",
+ "[INFO|modeling_utils.py:1531] 2024-06-30 19:09:48,444 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+ "[INFO|configuration_utils.py:1000] 2024-06-30 19:09:48,446 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645\n",
+ "}\n",
+ "\n",
+ "[INFO|modeling_utils.py:4364] 2024-06-30 19:10:17,136 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+ "\n",
+ "[INFO|modeling_utils.py:4372] 2024-06-30 19:10:17,136 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-0.5B-Instruct.\n",
+ "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+ "[INFO|configuration_utils.py:955] 2024-06-30 19:10:17,747 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/generation_config.json\n",
+ "[INFO|configuration_utils.py:1000] 2024-06-30 19:10:17,747 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"do_sample\": true,\n",
+ " \"eos_token_id\": [\n",
+ " 151645,\n",
+ " 151643\n",
+ " ],\n",
+ " \"pad_token_id\": 151643,\n",
+ " \"repetition_penalty\": 1.1,\n",
+ " \"temperature\": 0.7,\n",
+ " \"top_k\": 20,\n",
+ " \"top_p\": 0.8\n",
+ "}\n",
+ "\n",
+ "06/30/2024 19:10:18 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
+ "06/30/2024 19:10:18 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-3920\n",
+ "06/30/2024 19:10:18 - INFO - llamafactory.model.loader - all params: 498,431,872\n",
+ "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.666 GB of memory reserved.\n",
+ "loading train/test data files\n",
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n",
+ "Evaluating model: Qwen/Qwen2-0.5B-Instruct\n",
+ " 0%| | 0/1133 [00:00, ?it/s]--------\n",
+ "step 1: Old Geng held his gun to his chest, eyes on a triangle shape, but the trigger sounded when he cocked it and fired: shot after shot of golden sparrows thundered down the slope, and shattering stones clattered as they fell through the air.<|im_end|>\n",
+ "--------\n",
+ "step 2: Old Geng held his gun to his chest, eyes on a triangle shape, but the trigger sounded when he cocked it and fired: shot after shot of golden sparrows thundered down the slope, and shattering stones clattered as they fell through the air.\n",
+ "--------\n",
+ "step 3: Old Geng held his gun to his chest, eyes on a triangle shape, but the trigger sounded when he cocked it and fired: shot after shot of golden sparrows thundered down the slope, and shattering stones clattered as they fell through the air.\n",
+ "100%|███████████████████████████████████████| 1133/1133 [33:14<00:00, 1.76s/it]\n",
+ "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "1.057 GB of memory reserved.\n",
+ " chinese ... Qwen/Qwen2-0.5B-Instruct_checkpoint-3920\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Geng held his gun to his chest, eyes on a ...\n",
+ "\n",
+ "[1 rows x 9 columns]\n",
+ "{'accuracy': 0.00088261253309797, 'correct_ids': [364], 'meteor': 0.29569751947150547, 'bleu_scores': {'bleu': 0.06290335358107121, 'precisions': [0.33640226628895187, 0.09157729444388761, 0.033815921952574386, 0.015028901734104046], 'brevity_penalty': 1.0, 'length_ratio': 1.0289499834382245, 'translation_length': 31064, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.328871406524778, 'rouge2': 0.10887785000250436, 'rougeL': 0.2694111761024649, 'rougeLsum': 0.2691332869747859}}\n",
+ "Epoch 8\n",
+ "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+ "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n",
+ "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n",
+ "Qwen/Qwen2-0.5B-Instruct llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-4480 True datasets/mac/mac.tsv results/mac-results_lf.csv\n",
+ "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.0 GB of memory reserved.\n",
+ "loading model: Qwen/Qwen2-0.5B-Instruct\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 19:43:50,488 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/vocab.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 19:43:50,488 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/merges.txt\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 19:43:50,488 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 19:43:50,488 >> loading file added_tokens.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 19:43:50,488 >> loading file special_tokens_map.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 19:43:50,488 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer_config.json\n",
+ "[WARNING|logging.py:313] 2024-06-30 19:43:50,640 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "06/30/2024 19:43:50 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+ "06/30/2024 19:43:50 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+ "[INFO|configuration_utils.py:733] 2024-06-30 19:43:50,918 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-06-30 19:43:50,918 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "06/30/2024 19:43:50 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n",
+ "06/30/2024 19:43:50 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
+ "[INFO|modeling_utils.py:3556] 2024-06-30 19:43:50,973 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/model.safetensors\n",
+ "[INFO|modeling_utils.py:1531] 2024-06-30 19:43:51,791 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+ "[INFO|configuration_utils.py:1000] 2024-06-30 19:43:51,794 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645\n",
+ "}\n",
+ "\n",
+ "[INFO|modeling_utils.py:4364] 2024-06-30 19:44:16,853 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+ "\n",
+ "[INFO|modeling_utils.py:4372] 2024-06-30 19:44:16,853 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-0.5B-Instruct.\n",
+ "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+ "[INFO|configuration_utils.py:955] 2024-06-30 19:44:17,214 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/generation_config.json\n",
+ "[INFO|configuration_utils.py:1000] 2024-06-30 19:44:17,214 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"do_sample\": true,\n",
+ " \"eos_token_id\": [\n",
+ " 151645,\n",
+ " 151643\n",
+ " ],\n",
+ " \"pad_token_id\": 151643,\n",
+ " \"repetition_penalty\": 1.1,\n",
+ " \"temperature\": 0.7,\n",
+ " \"top_k\": 20,\n",
+ " \"top_p\": 0.8\n",
+ "}\n",
+ "\n",
+ "06/30/2024 19:44:17 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
+ "06/30/2024 19:44:17 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-4480\n",
+ "06/30/2024 19:44:17 - INFO - llamafactory.model.loader - all params: 498,431,872\n",
+ "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.666 GB of memory reserved.\n",
+ "loading train/test data files\n",
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n",
+ "Evaluating model: Qwen/Qwen2-0.5B-Instruct\n",
+ " 0%| | 0/1133 [00:00, ?it/s]--------\n",
+ "step 1: Old Geng took up his gun and raised a triangular brow – the cocking of the revolver started as soon as he lifted it. Bang! Bang! Bang! – hail was raining down from the heavens in a torrent of gold sparrows that sounded like hail as they whizzed down the path, rustling leaves as they passed by.<|im_end|>\n",
+ "--------\n",
+ "step 2: Old Geng took up his gun and raised a triangular brow – the cocking of the revolver started as soon as he lifted it. Bang! Bang! Bang! – hail was raining down from the heavens in a torrent of gold sparrows that sounded like hail as they whizzed down the path, rustling leaves as they passed by.\n",
+ "--------\n",
+ "step 3: Old Geng took up his gun and raised a triangular brow – the cocking of the revolver started as soon as he lifted it. Bang! Bang! Bang! – hail was raining down from the heavens in a torrent of gold sparrows that sounded like hail as they whizzed down the path, rustling leaves as they passed by.\n",
+ "100%|███████████████████████████████████████| 1133/1133 [34:03<00:00, 1.80s/it]\n",
+ "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.9 GB of memory reserved.\n",
+ " chinese ... Qwen/Qwen2-0.5B-Instruct_checkpoint-4480\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Geng took up his gun and raised a triangul...\n",
+ "\n",
+ "[1 rows x 10 columns]\n",
+ "{'accuracy': 0.0, 'correct_ids': [], 'meteor': 0.29297589531864165, 'bleu_scores': {'bleu': 0.066715036654756, 'precisions': [0.33156006043817676, 0.0917577933735923, 0.03666926492018843, 0.017757733774927772], 'brevity_penalty': 1.0, 'length_ratio': 1.0522689632328586, 'translation_length': 31768, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.3235260151085271, 'rouge2': 0.10613228641914846, 'rougeL': 0.2654728857129883, 'rougeLsum': 0.26595119389766264}}\n",
+ "Epoch 9\n",
+ "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+ "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n",
+ "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n",
+ "Qwen/Qwen2-0.5B-Instruct llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-5040 True datasets/mac/mac.tsv results/mac-results_lf.csv\n",
+ "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.0 GB of memory reserved.\n",
+ "loading model: Qwen/Qwen2-0.5B-Instruct\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 20:18:42,543 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/vocab.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 20:18:42,544 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/merges.txt\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 20:18:42,544 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 20:18:42,544 >> loading file added_tokens.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 20:18:42,544 >> loading file special_tokens_map.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 20:18:42,544 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer_config.json\n",
+ "[WARNING|logging.py:313] 2024-06-30 20:18:42,670 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "06/30/2024 20:18:42 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+ "06/30/2024 20:18:42 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+ "[INFO|configuration_utils.py:733] 2024-06-30 20:18:42,995 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-06-30 20:18:42,995 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "06/30/2024 20:18:42 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n",
+ "06/30/2024 20:18:42 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
+ "[INFO|modeling_utils.py:3556] 2024-06-30 20:18:43,139 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/model.safetensors\n",
+ "[INFO|modeling_utils.py:1531] 2024-06-30 20:18:44,397 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+ "[INFO|configuration_utils.py:1000] 2024-06-30 20:18:44,400 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645\n",
+ "}\n",
+ "\n",
+ "[INFO|modeling_utils.py:4364] 2024-06-30 20:19:10,704 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+ "\n",
+ "[INFO|modeling_utils.py:4372] 2024-06-30 20:19:10,704 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-0.5B-Instruct.\n",
+ "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+ "[INFO|configuration_utils.py:955] 2024-06-30 20:19:11,065 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/generation_config.json\n",
+ "[INFO|configuration_utils.py:1000] 2024-06-30 20:19:11,065 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"do_sample\": true,\n",
+ " \"eos_token_id\": [\n",
+ " 151645,\n",
+ " 151643\n",
+ " ],\n",
+ " \"pad_token_id\": 151643,\n",
+ " \"repetition_penalty\": 1.1,\n",
+ " \"temperature\": 0.7,\n",
+ " \"top_k\": 20,\n",
+ " \"top_p\": 0.8\n",
+ "}\n",
+ "\n",
+ "06/30/2024 20:19:11 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
+ "06/30/2024 20:19:11 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-5040\n",
+ "06/30/2024 20:19:11 - INFO - llamafactory.model.loader - all params: 498,431,872\n",
+ "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.666 GB of memory reserved.\n",
+ "loading train/test data files\n",
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n",
+ "Evaluating model: Qwen/Qwen2-0.5B-Instruct\n",
+ " 0%| | 0/1133 [00:00, ?it/s]--------\n",
+ "step 1: Old Geng took out his pistol, but it was too late. Shot after shot of shotgun went off as he held the trigger, a bang-bang-gong-tingling fall from the din of bullets falling from the air.<|im_end|>\n",
+ "--------\n",
+ "step 2: Old Geng took out his pistol, but it was too late. Shot after shot of shotgun went off as he held the trigger, a bang-bang-gong-tingling fall from the din of bullets falling from the air.\n",
+ "--------\n",
+ "step 3: Old Geng took out his pistol, but it was too late. Shot after shot of shotgun went off as he held the trigger, a bang-bang-gong-tingling fall from the din of bullets falling from the air.\n",
+ "100%|███████████████████████████████████████| 1133/1133 [47:05<00:00, 2.49s/it]\n",
+ "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "4.221 GB of memory reserved.\n",
+ " chinese ... Qwen/Qwen2-0.5B-Instruct_checkpoint-5040\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Geng took out his pistol, but it was too l...\n",
+ "\n",
+ "[1 rows x 11 columns]\n",
+ "{'accuracy': 0.0, 'correct_ids': [], 'meteor': 0.2833319356953958, 'bleu_scores': {'bleu': 0.05430760022077538, 'precisions': [0.28200039135660976, 0.0749133949191686, 0.029243256147051803, 0.01408015809300315], 'brevity_penalty': 1.0, 'length_ratio': 1.184928784365684, 'translation_length': 35773, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.3123182638202295, 'rouge2': 0.1006380742528073, 'rougeL': 0.25624416362806557, 'rougeLsum': 0.25609208337653155}}\n",
+ "Epoch 10\n",
+ "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+ "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n",
+ "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n",
+ "Qwen/Qwen2-0.5B-Instruct llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-5600 True datasets/mac/mac.tsv results/mac-results_lf.csv\n",
+ "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.0 GB of memory reserved.\n",
+ "loading model: Qwen/Qwen2-0.5B-Instruct\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 21:06:41,264 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/vocab.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 21:06:41,264 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/merges.txt\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 21:06:41,264 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 21:06:41,264 >> loading file added_tokens.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 21:06:41,264 >> loading file special_tokens_map.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 21:06:41,264 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer_config.json\n",
+ "[WARNING|logging.py:313] 2024-06-30 21:06:41,413 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "06/30/2024 21:06:41 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+ "06/30/2024 21:06:41 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+ "[INFO|configuration_utils.py:733] 2024-06-30 21:06:41,679 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-06-30 21:06:41,680 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 896,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4864,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 24,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 14,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "06/30/2024 21:06:41 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n",
+ "06/30/2024 21:06:41 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
+ "[INFO|modeling_utils.py:3556] 2024-06-30 21:06:41,746 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/model.safetensors\n",
+ "[INFO|modeling_utils.py:1531] 2024-06-30 21:06:42,649 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+ "[INFO|configuration_utils.py:1000] 2024-06-30 21:06:42,653 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645\n",
+ "}\n",
+ "\n",
+ "[INFO|modeling_utils.py:4364] 2024-06-30 21:07:13,550 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+ "\n",
+ "[INFO|modeling_utils.py:4372] 2024-06-30 21:07:13,550 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-0.5B-Instruct.\n",
+ "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+ "[INFO|configuration_utils.py:955] 2024-06-30 21:07:13,853 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/generation_config.json\n",
+ "[INFO|configuration_utils.py:1000] 2024-06-30 21:07:13,853 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"do_sample\": true,\n",
+ " \"eos_token_id\": [\n",
+ " 151645,\n",
+ " 151643\n",
+ " ],\n",
+ " \"pad_token_id\": 151643,\n",
+ " \"repetition_penalty\": 1.1,\n",
+ " \"temperature\": 0.7,\n",
+ " \"top_k\": 20,\n",
+ " \"top_p\": 0.8\n",
+ "}\n",
+ "\n",
+ "06/30/2024 21:07:14 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
+ "06/30/2024 21:07:14 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-5600\n",
+ "06/30/2024 21:07:14 - INFO - llamafactory.model.loader - all params: 498,431,872\n",
+ "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.666 GB of memory reserved.\n",
+ "loading train/test data files\n",
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n",
+ "Evaluating model: Qwen/Qwen2-0.5B-Instruct\n",
+ " 0%| | 0/1133 [00:00, ?it/s]--------\n",
+ "step 1: Old Geng took up his gun and fired – hammering rain! Yellow streaks flew as he fired a goose-pat of gold – and then there was the sound of gravel between his bullets – crunchy.<|im_end|>\n",
+ "--------\n",
+ "step 2: Old Geng took up his gun and fired – hammering rain! Yellow streaks flew as he fired a goose-pat of gold – and then there was the sound of gravel between his bullets – crunchy.\n",
+ "--------\n",
+ "step 3: Old Geng took up his gun and fired – hammering rain! Yellow streaks flew as he fired a goose-pat of gold – and then there was the sound of gravel between his bullets – crunchy.\n",
+ "100%|███████████████████████████████████████| 1133/1133 [48:43<00:00, 2.58s/it]\n",
+ "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "4.221 GB of memory reserved.\n",
+ " chinese ... Qwen/Qwen2-0.5B-Instruct_checkpoint-5600\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Geng took up his gun and fired – hammering...\n",
+ "\n",
+ "[1 rows x 12 columns]\n",
+ "{'accuracy': 0.0, 'correct_ids': [], 'meteor': 0.28432663251720675, 'bleu_scores': {'bleu': 0.052792420940353475, 'precisions': [0.29167024596970476, 0.07445989937851435, 0.0279223562549752, 0.012809131261889664], 'brevity_penalty': 1.0, 'length_ratio': 1.156773766147731, 'translation_length': 34923, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.31243618674019946, 'rouge2': 0.09792736995151512, 'rougeL': 0.25604383226456534, 'rougeLsum': 0.2555907570933199}}\n",
+ "CPU times: user 12min 29s, sys: 4min 19s, total: 16min 48s\n",
+ "Wall time: 8h 34min 52s\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "os.environ[\"MODEL_NAME\"] = \"Qwen/Qwen2-0.5B-Instruct\" \n",
+ "for i in range(1, num_train_epochs + 1):\n",
+ " print(f\"Epoch {i}\")\n",
+ " adapter_path = f\"llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-{560 * i}\"\n",
+ " os.environ[\"ADAPTER_NAME_OR_PATH\"] = adapter_path\n",
+ " !python llm_toolkit/eval.py "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Epoch 1\n",
+ "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+ "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n",
+ "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n",
+ "Qwen/Qwen2-1.5B-Instruct llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-560 True datasets/mac/mac.tsv results/mac-results_lf.csv\n",
+ "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.0 GB of memory reserved.\n",
+ "loading model: Qwen/Qwen2-1.5B-Instruct\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 21:56:19,887 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/vocab.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 21:56:19,887 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/merges.txt\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 21:56:19,887 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 21:56:19,887 >> loading file added_tokens.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 21:56:19,887 >> loading file special_tokens_map.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 21:56:19,887 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer_config.json\n",
+ "[WARNING|logging.py:313] 2024-06-30 21:56:20,070 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "06/30/2024 21:56:20 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+ "06/30/2024 21:56:20 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+ "[INFO|configuration_utils.py:733] 2024-06-30 21:56:20,393 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-06-30 21:56:20,393 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "06/30/2024 21:56:20 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n",
+ "06/30/2024 21:56:20 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
+ "[INFO|modeling_utils.py:3556] 2024-06-30 21:56:20,457 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/model.safetensors\n",
+ "[INFO|modeling_utils.py:1531] 2024-06-30 21:56:22,769 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+ "[INFO|configuration_utils.py:1000] 2024-06-30 21:56:22,772 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645\n",
+ "}\n",
+ "\n",
+ "[INFO|modeling_utils.py:4364] 2024-06-30 21:58:45,740 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+ "\n",
+ "[INFO|modeling_utils.py:4372] 2024-06-30 21:58:45,740 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-1.5B-Instruct.\n",
+ "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+ "[INFO|configuration_utils.py:955] 2024-06-30 21:58:46,024 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/generation_config.json\n",
+ "[INFO|configuration_utils.py:1000] 2024-06-30 21:58:46,024 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"do_sample\": true,\n",
+ " \"eos_token_id\": [\n",
+ " 151645,\n",
+ " 151643\n",
+ " ],\n",
+ " \"pad_token_id\": 151643,\n",
+ " \"repetition_penalty\": 1.1,\n",
+ " \"temperature\": 0.7,\n",
+ " \"top_k\": 20,\n",
+ " \"top_p\": 0.8\n",
+ "}\n",
+ "\n",
+ "06/30/2024 21:58:46 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
+ "06/30/2024 21:58:46 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-560\n",
+ "06/30/2024 21:58:46 - INFO - llamafactory.model.loader - all params: 1,552,946,688\n",
+ "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "1.604 GB of memory reserved.\n",
+ "loading train/test data files\n",
+ "Map: 100%|████████████████████████| 4528/4528 [00:00<00:00, 35828.73 examples/s]\n",
+ "Map: 100%|████████████████████████| 1133/1133 [00:00<00:00, 12322.75 examples/s]\n",
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n",
+ "Evaluating model: Qwen/Qwen2-1.5B-Instruct\n",
+ " 0%| | 0/1133 [00:00, ?it/s]--------\n",
+ "step 1: Grannie Geng held up his gun with one eye, narrowed it, raised the barrel of the rifle, fired a hail of bullets at the target.<|im_end|>\n",
+ "--------\n",
+ "step 2: Grannie Geng held up his gun with one eye, narrowed it, raised the barrel of the rifle, fired a hail of bullets at the target.\n",
+ "--------\n",
+ "step 3: Grannie Geng held up his gun with one eye, narrowed it, raised the barrel of the rifle, fired a hail of bullets at the target.\n",
+ "100%|███████████████████████████████████████| 1133/1133 [44:42<00:00, 2.37s/it]\n",
+ "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "1.857 GB of memory reserved.\n",
+ " chinese ... Qwen/Qwen2-1.5B-Instruct_checkpoint-560\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Grannie Geng held up his gun with one eye, nar...\n",
+ "\n",
+ "[1 rows x 13 columns]\n",
+ "{'accuracy': 0.00264783759929391, 'correct_ids': [240, 738, 1026], 'meteor': 0.3555548051770412, 'bleu_scores': {'bleu': 0.08837370077365968, 'precisions': [0.4154119950169069, 0.13452266152362585, 0.055553404823661494, 0.026475589021131892], 'brevity_penalty': 0.9281439603442432, 'length_ratio': 0.9306061609804571, 'translation_length': 28095, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.39474926445540526, 'rouge2': 0.14909336721544575, 'rougeL': 0.3340601663307491, 'rougeLsum': 0.33415584663948783}}\n",
+ "Epoch 2\n",
+ "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+ "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n",
+ "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n",
+ "Qwen/Qwen2-1.5B-Instruct llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-1120 True datasets/mac/mac.tsv results/mac-results_lf.csv\n",
+ "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.0 GB of memory reserved.\n",
+ "loading model: Qwen/Qwen2-1.5B-Instruct\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 22:43:48,381 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/vocab.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 22:43:48,381 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/merges.txt\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 22:43:48,381 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 22:43:48,381 >> loading file added_tokens.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 22:43:48,381 >> loading file special_tokens_map.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 22:43:48,381 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer_config.json\n",
+ "[WARNING|logging.py:313] 2024-06-30 22:43:48,549 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "06/30/2024 22:43:48 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+ "06/30/2024 22:43:48 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+ "[INFO|configuration_utils.py:733] 2024-06-30 22:43:48,826 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-06-30 22:43:48,826 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "06/30/2024 22:43:48 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n",
+ "06/30/2024 22:43:48 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
+ "[INFO|modeling_utils.py:3556] 2024-06-30 22:43:48,853 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/model.safetensors\n",
+ "[INFO|modeling_utils.py:1531] 2024-06-30 22:43:49,950 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+ "[INFO|configuration_utils.py:1000] 2024-06-30 22:43:49,954 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645\n",
+ "}\n",
+ "\n",
+ "[INFO|modeling_utils.py:4364] 2024-06-30 22:46:48,562 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+ "\n",
+ "[INFO|modeling_utils.py:4372] 2024-06-30 22:46:48,562 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-1.5B-Instruct.\n",
+ "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+ "[INFO|configuration_utils.py:955] 2024-06-30 22:46:48,846 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/generation_config.json\n",
+ "[INFO|configuration_utils.py:1000] 2024-06-30 22:46:48,846 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"do_sample\": true,\n",
+ " \"eos_token_id\": [\n",
+ " 151645,\n",
+ " 151643\n",
+ " ],\n",
+ " \"pad_token_id\": 151643,\n",
+ " \"repetition_penalty\": 1.1,\n",
+ " \"temperature\": 0.7,\n",
+ " \"top_k\": 20,\n",
+ " \"top_p\": 0.8\n",
+ "}\n",
+ "\n",
+ "06/30/2024 22:46:51 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
+ "06/30/2024 22:46:54 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-1120\n",
+ "06/30/2024 22:46:55 - INFO - llamafactory.model.loader - all params: 1,552,946,688\n",
+ "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "1.604 GB of memory reserved.\n",
+ "loading train/test data files\n",
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n",
+ "Evaluating model: Qwen/Qwen2-1.5B-Instruct\n",
+ " 0%| | 0/1133 [00:00, ?it/s]--------\n",
+ "step 1: Old Geng raised his rifle and squinted at it through a slit in his eye. He squeezed the trigger and gold sparrows began to fall like rain. Iron sand scattered among the willow branches crackled.<|im_end|>\n",
+ "--------\n",
+ "step 2: Old Geng raised his rifle and squinted at it through a slit in his eye. He squeezed the trigger and gold sparrows began to fall like rain. Iron sand scattered among the willow branches crackled.\n",
+ "--------\n",
+ "step 3: Old Geng raised his rifle and squinted at it through a slit in his eye. He squeezed the trigger and gold sparrows began to fall like rain. Iron sand scattered among the willow branches crackled.\n",
+ "100%|███████████████████████████████████████| 1133/1133 [54:52<00:00, 2.91s/it]\n",
+ "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "1.818 GB of memory reserved.\n",
+ " chinese ... Qwen/Qwen2-1.5B-Instruct_checkpoint-1120\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Geng raised his rifle and squinted at it t...\n",
+ "\n",
+ "[1 rows x 14 columns]\n",
+ "{'accuracy': 0.00353045013239188, 'correct_ids': [77, 272, 381, 659], 'meteor': 0.364551066769633, 'bleu_scores': {'bleu': 0.09512979475404361, 'precisions': [0.41979252665206934, 0.1427074758661977, 0.06224115026959444, 0.03069440470838272], 'brevity_penalty': 0.9197334814475309, 'length_ratio': 0.9227890029811195, 'translation_length': 27859, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.40366781962223464, 'rouge2': 0.1631594243449107, 'rougeL': 0.34288741533227174, 'rougeLsum': 0.34268506193513737}}\n",
+ "Epoch 3\n",
+ "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+ "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n",
+ "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n",
+ "Qwen/Qwen2-1.5B-Instruct llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-1680 True datasets/mac/mac.tsv results/mac-results_lf.csv\n",
+ "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.0 GB of memory reserved.\n",
+ "loading model: Qwen/Qwen2-1.5B-Instruct\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 23:42:11,002 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/vocab.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 23:42:11,002 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/merges.txt\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 23:42:11,002 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 23:42:11,002 >> loading file added_tokens.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 23:42:11,002 >> loading file special_tokens_map.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-06-30 23:42:11,002 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer_config.json\n",
+ "[WARNING|logging.py:313] 2024-06-30 23:42:11,240 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "06/30/2024 23:42:11 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+ "06/30/2024 23:42:11 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+ "[INFO|configuration_utils.py:733] 2024-06-30 23:42:11,554 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-06-30 23:42:11,554 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "06/30/2024 23:42:11 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n",
+ "06/30/2024 23:42:11 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
+ "[INFO|modeling_utils.py:3556] 2024-06-30 23:42:11,668 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/model.safetensors\n",
+ "[INFO|modeling_utils.py:1531] 2024-06-30 23:42:13,979 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+ "[INFO|configuration_utils.py:1000] 2024-06-30 23:42:13,983 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645\n",
+ "}\n",
+ "\n",
+ "[INFO|modeling_utils.py:4364] 2024-06-30 23:43:46,052 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+ "\n",
+ "[INFO|modeling_utils.py:4372] 2024-06-30 23:43:46,052 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-1.5B-Instruct.\n",
+ "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+ "[INFO|configuration_utils.py:955] 2024-06-30 23:43:47,155 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/generation_config.json\n",
+ "[INFO|configuration_utils.py:1000] 2024-06-30 23:43:47,155 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"do_sample\": true,\n",
+ " \"eos_token_id\": [\n",
+ " 151645,\n",
+ " 151643\n",
+ " ],\n",
+ " \"pad_token_id\": 151643,\n",
+ " \"repetition_penalty\": 1.1,\n",
+ " \"temperature\": 0.7,\n",
+ " \"top_k\": 20,\n",
+ " \"top_p\": 0.8\n",
+ "}\n",
+ "\n",
+ "06/30/2024 23:43:47 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
+ "06/30/2024 23:43:48 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-1680\n",
+ "06/30/2024 23:43:48 - INFO - llamafactory.model.loader - all params: 1,552,946,688\n",
+ "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "1.604 GB of memory reserved.\n",
+ "loading train/test data files\n",
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n",
+ "Evaluating model: Qwen/Qwen2-1.5B-Instruct\n",
+ " 0%| | 0/1133 [00:00, ?it/s]--------\n",
+ "step 1: Old Geng took his gun off the table and raised it to his eye. He squeezed the trigger and a hail of bullets fell from the sky, golden sparrows falling like rain as shrapnel flew through the air among the willows.<|im_end|>\n",
+ "--------\n",
+ "step 2: Old Geng took his gun off the table and raised it to his eye. He squeezed the trigger and a hail of bullets fell from the sky, golden sparrows falling like rain as shrapnel flew through the air among the willows.\n",
+ "--------\n",
+ "step 3: Old Geng took his gun off the table and raised it to his eye. He squeezed the trigger and a hail of bullets fell from the sky, golden sparrows falling like rain as shrapnel flew through the air among the willows.\n",
+ "100%|███████████████████████████████████████| 1133/1133 [42:11<00:00, 2.23s/it]\n",
+ "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "1.838 GB of memory reserved.\n",
+ " chinese ... Qwen/Qwen2-1.5B-Instruct_checkpoint-1680\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Geng took his gun off the table and raised...\n",
+ "\n",
+ "[1 rows x 15 columns]\n",
+ "{'accuracy': 0.00529567519858782, 'correct_ids': [77, 147, 199, 452, 738, 918], 'meteor': 0.3723931629938662, 'bleu_scores': {'bleu': 0.1007710645770402, 'precisions': [0.4158811367698076, 0.14392059553349876, 0.0641747868453106, 0.03384639860000795], 'brevity_penalty': 0.9437209131631352, 'length_ratio': 0.9452467704537927, 'translation_length': 28537, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.40370214820886885, 'rouge2': 0.1641473385689542, 'rougeL': 0.3423335232392143, 'rougeLsum': 0.3424044524649077}}\n",
+ "Epoch 4\n",
+ "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+ "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n",
+ "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n",
+ "Qwen/Qwen2-1.5B-Instruct llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-2240 True datasets/mac/mac.tsv results/mac-results_lf.csv\n",
+ "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.0 GB of memory reserved.\n",
+ "loading model: Qwen/Qwen2-1.5B-Instruct\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 00:26:19,392 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/vocab.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 00:26:19,392 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/merges.txt\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 00:26:19,392 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 00:26:19,392 >> loading file added_tokens.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 00:26:19,392 >> loading file special_tokens_map.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 00:26:19,392 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer_config.json\n",
+ "[WARNING|logging.py:313] 2024-07-01 00:26:19,534 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "07/01/2024 00:26:19 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+ "07/01/2024 00:26:19 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+ "[INFO|configuration_utils.py:733] 2024-07-01 00:26:19,883 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-01 00:26:19,883 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "07/01/2024 00:26:19 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n",
+ "07/01/2024 00:26:19 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
+ "[INFO|modeling_utils.py:3556] 2024-07-01 00:26:19,958 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/model.safetensors\n",
+ "[INFO|modeling_utils.py:1531] 2024-07-01 00:26:21,213 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+ "[INFO|configuration_utils.py:1000] 2024-07-01 00:26:21,216 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645\n",
+ "}\n",
+ "\n",
+ "[INFO|modeling_utils.py:4364] 2024-07-01 00:27:43,020 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+ "\n",
+ "[INFO|modeling_utils.py:4372] 2024-07-01 00:27:43,020 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-1.5B-Instruct.\n",
+ "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+ "[INFO|configuration_utils.py:955] 2024-07-01 00:27:43,422 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/generation_config.json\n",
+ "[INFO|configuration_utils.py:1000] 2024-07-01 00:27:43,422 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"do_sample\": true,\n",
+ " \"eos_token_id\": [\n",
+ " 151645,\n",
+ " 151643\n",
+ " ],\n",
+ " \"pad_token_id\": 151643,\n",
+ " \"repetition_penalty\": 1.1,\n",
+ " \"temperature\": 0.7,\n",
+ " \"top_k\": 20,\n",
+ " \"top_p\": 0.8\n",
+ "}\n",
+ "\n",
+ "07/01/2024 00:27:43 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
+ "07/01/2024 00:27:44 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-2240\n",
+ "07/01/2024 00:27:44 - INFO - llamafactory.model.loader - all params: 1,552,946,688\n",
+ "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "1.604 GB of memory reserved.\n",
+ "loading train/test data files\n",
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n",
+ "Evaluating model: Qwen/Qwen2-1.5B-Instruct\n",
+ " 0%| | 0/1133 [00:00, ?it/s]--------\n",
+ "step 1: Old Geng raised his rifle and squeezed the trigger. The sound of gunfire joined the chattering rain as hundreds of sparrows fell from the sky, the pellets flying through the air between the willow twigs.<|im_end|>\n",
+ "--------\n",
+ "step 2: Old Geng raised his rifle and squeezed the trigger. The sound of gunfire joined the chattering rain as hundreds of sparrows fell from the sky, the pellets flying through the air between the willow twigs.\n",
+ "--------\n",
+ "step 3: Old Geng raised his rifle and squeezed the trigger. The sound of gunfire joined the chattering rain as hundreds of sparrows fell from the sky, the pellets flying through the air between the willow twigs.\n",
+ "100%|███████████████████████████████████████| 1133/1133 [40:27<00:00, 2.14s/it]\n",
+ "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "1.838 GB of memory reserved.\n",
+ " chinese ... Qwen/Qwen2-1.5B-Instruct_checkpoint-2240\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Geng raised his rifle and squeezed the tri...\n",
+ "\n",
+ "[1 rows x 16 columns]\n",
+ "{'accuracy': 0.00264783759929391, 'correct_ids': [147, 199, 738], 'meteor': 0.35847259317675817, 'bleu_scores': {'bleu': 0.09681182585608442, 'precisions': [0.4169993042077123, 0.14579353556964927, 0.06572957431515054, 0.03353403579193845], 'brevity_penalty': 0.8998048931972519, 'length_ratio': 0.9045048029148725, 'translation_length': 27307, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.3940152835057211, 'rouge2': 0.16326412776493693, 'rougeL': 0.33702749255447373, 'rougeLsum': 0.3369782380738291}}\n",
+ "Epoch 5\n",
+ "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+ "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n",
+ "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n",
+ "Qwen/Qwen2-1.5B-Instruct llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-2800 True datasets/mac/mac.tsv results/mac-results_lf.csv\n",
+ "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.0 GB of memory reserved.\n",
+ "loading model: Qwen/Qwen2-1.5B-Instruct\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 01:08:35,227 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/vocab.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 01:08:35,227 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/merges.txt\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 01:08:35,227 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 01:08:35,227 >> loading file added_tokens.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 01:08:35,227 >> loading file special_tokens_map.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 01:08:35,227 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer_config.json\n",
+ "[WARNING|logging.py:313] 2024-07-01 01:08:35,401 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "07/01/2024 01:08:35 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+ "07/01/2024 01:08:35 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+ "[INFO|configuration_utils.py:733] 2024-07-01 01:08:35,697 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-01 01:08:35,697 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "07/01/2024 01:08:35 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n",
+ "07/01/2024 01:08:35 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
+ "[INFO|modeling_utils.py:3556] 2024-07-01 01:08:35,772 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/model.safetensors\n",
+ "[INFO|modeling_utils.py:1531] 2024-07-01 01:08:37,565 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+ "[INFO|configuration_utils.py:1000] 2024-07-01 01:08:37,570 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645\n",
+ "}\n",
+ "\n",
+ "[INFO|modeling_utils.py:4364] 2024-07-01 01:10:00,800 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+ "\n",
+ "[INFO|modeling_utils.py:4372] 2024-07-01 01:10:00,800 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-1.5B-Instruct.\n",
+ "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+ "[INFO|configuration_utils.py:955] 2024-07-01 01:10:01,095 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/generation_config.json\n",
+ "[INFO|configuration_utils.py:1000] 2024-07-01 01:10:01,096 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"do_sample\": true,\n",
+ " \"eos_token_id\": [\n",
+ " 151645,\n",
+ " 151643\n",
+ " ],\n",
+ " \"pad_token_id\": 151643,\n",
+ " \"repetition_penalty\": 1.1,\n",
+ " \"temperature\": 0.7,\n",
+ " \"top_k\": 20,\n",
+ " \"top_p\": 0.8\n",
+ "}\n",
+ "\n",
+ "07/01/2024 01:10:01 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
+ "07/01/2024 01:10:02 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-2800\n",
+ "07/01/2024 01:10:02 - INFO - llamafactory.model.loader - all params: 1,552,946,688\n",
+ "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "1.604 GB of memory reserved.\n",
+ "loading train/test data files\n",
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n",
+ "Evaluating model: Qwen/Qwen2-1.5B-Instruct\n",
+ " 0%| | 0/1133 [00:00, ?it/s]--------\n",
+ "step 1: Old Geng took aim and squeezed the trigger; dozens of gold-winged sparrows fell in a drenching rain, iron-shrapnel crackled among the willows, and a chorus of tiny explosions sounded over their heads.<|im_end|>\n",
+ "--------\n",
+ "step 2: Old Geng took aim and squeezed the trigger; dozens of gold-winged sparrows fell in a drenching rain, iron-shrapnel crackled among the willows, and a chorus of tiny explosions sounded over their heads.\n",
+ "--------\n",
+ "step 3: Old Geng took aim and squeezed the trigger; dozens of gold-winged sparrows fell in a drenching rain, iron-shrapnel crackled among the willows, and a chorus of tiny explosions sounded over their heads.\n",
+ "100%|█████████████████████████████████████| 1133/1133 [1:17:25<00:00, 4.10s/it]\n",
+ "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "5.197 GB of memory reserved.\n",
+ " chinese ... Qwen/Qwen2-1.5B-Instruct_checkpoint-2800\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Geng took aim and squeezed the trigger; do...\n",
+ "\n",
+ "[1 rows x 17 columns]\n",
+ "{'accuracy': 0.00176522506619594, 'correct_ids': [147, 199], 'meteor': 0.35988930837184085, 'bleu_scores': {'bleu': 0.09029975816152737, 'precisions': [0.36273504273504276, 0.12144836028606404, 0.05442995653627549, 0.02772855206921714], 'brevity_penalty': 1.0, 'length_ratio': 1.0657502484266312, 'translation_length': 32175, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.3917385628494343, 'rouge2': 0.158275578220186, 'rougeL': 0.33145202576141436, 'rougeLsum': 0.331550843392171}}\n",
+ "Epoch 6\n",
+ "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+ "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n",
+ "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n",
+ "Qwen/Qwen2-1.5B-Instruct llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-3360 True datasets/mac/mac.tsv results/mac-results_lf.csv\n",
+ "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.0 GB of memory reserved.\n",
+ "loading model: Qwen/Qwen2-1.5B-Instruct\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 02:27:49,309 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/vocab.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 02:27:49,309 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/merges.txt\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 02:27:49,309 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 02:27:49,309 >> loading file added_tokens.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 02:27:49,309 >> loading file special_tokens_map.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 02:27:49,309 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer_config.json\n",
+ "[WARNING|logging.py:313] 2024-07-01 02:27:49,467 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "07/01/2024 02:27:49 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+ "07/01/2024 02:27:49 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+ "[INFO|configuration_utils.py:733] 2024-07-01 02:27:49,780 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-01 02:27:49,781 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "07/01/2024 02:27:49 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n",
+ "07/01/2024 02:27:49 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
+ "[INFO|modeling_utils.py:3556] 2024-07-01 02:27:49,851 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/model.safetensors\n",
+ "[INFO|modeling_utils.py:1531] 2024-07-01 02:27:51,890 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+ "[INFO|configuration_utils.py:1000] 2024-07-01 02:27:51,895 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645\n",
+ "}\n",
+ "\n",
+ "[INFO|modeling_utils.py:4364] 2024-07-01 02:29:12,004 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+ "\n",
+ "[INFO|modeling_utils.py:4372] 2024-07-01 02:29:12,004 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-1.5B-Instruct.\n",
+ "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+ "[INFO|configuration_utils.py:955] 2024-07-01 02:29:12,299 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/generation_config.json\n",
+ "[INFO|configuration_utils.py:1000] 2024-07-01 02:29:12,299 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"do_sample\": true,\n",
+ " \"eos_token_id\": [\n",
+ " 151645,\n",
+ " 151643\n",
+ " ],\n",
+ " \"pad_token_id\": 151643,\n",
+ " \"repetition_penalty\": 1.1,\n",
+ " \"temperature\": 0.7,\n",
+ " \"top_k\": 20,\n",
+ " \"top_p\": 0.8\n",
+ "}\n",
+ "\n",
+ "07/01/2024 02:29:12 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
+ "07/01/2024 02:29:13 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-3360\n",
+ "07/01/2024 02:29:13 - INFO - llamafactory.model.loader - all params: 1,552,946,688\n",
+ "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "1.604 GB of memory reserved.\n",
+ "loading train/test data files\n",
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n",
+ "Evaluating model: Qwen/Qwen2-1.5B-Instruct\n",
+ " 0%| | 0/1133 [00:00, ?it/s]--------\n",
+ "step 1: Old Geng took a step forward, raised his pistol, and squeezed the trigger. The pellets of lead raining down from above exploded against the snow-covered ground. They flew through the air as sparks of iron grit crackled among the willows.<|im_end|>\n",
+ "--------\n",
+ "step 2: Old Geng took a step forward, raised his pistol, and squeezed the trigger. The pellets of lead raining down from above exploded against the snow-covered ground. They flew through the air as sparks of iron grit crackled among the willows.\n",
+ "--------\n",
+ "step 3: Old Geng took a step forward, raised his pistol, and squeezed the trigger. The pellets of lead raining down from above exploded against the snow-covered ground. They flew through the air as sparks of iron grit crackled among the willows.\n",
+ "100%|█████████████████████████████████████| 1133/1133 [1:14:59<00:00, 3.97s/it]\n",
+ "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "5.178 GB of memory reserved.\n",
+ " chinese ... Qwen/Qwen2-1.5B-Instruct_checkpoint-3360\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Geng took a step forward, raised his pisto...\n",
+ "\n",
+ "[1 rows x 18 columns]\n",
+ "{'accuracy': 0.00176522506619594, 'correct_ids': [147, 199], 'meteor': 0.3460642024871934, 'bleu_scores': {'bleu': 0.09384985027759411, 'precisions': [0.39390243902439026, 0.1306634744440817, 0.059353130319651975, 0.031256174181056626], 'brevity_penalty': 0.949408256548351, 'length_ratio': 0.9506459092414706, 'translation_length': 28700, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.37889769060024026, 'rouge2': 0.14962195702951014, 'rougeL': 0.32301072520504354, 'rougeLsum': 0.3229695536364973}}\n",
+ "Epoch 7\n",
+ "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+ "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n",
+ "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n",
+ "Qwen/Qwen2-1.5B-Instruct llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-3920 True datasets/mac/mac.tsv results/mac-results_lf.csv\n",
+ "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.0 GB of memory reserved.\n",
+ "loading model: Qwen/Qwen2-1.5B-Instruct\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 03:44:33,600 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/vocab.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 03:44:33,600 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/merges.txt\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 03:44:33,600 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 03:44:33,601 >> loading file added_tokens.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 03:44:33,601 >> loading file special_tokens_map.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 03:44:33,601 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer_config.json\n",
+ "[WARNING|logging.py:313] 2024-07-01 03:44:34,047 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "07/01/2024 03:44:34 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+ "07/01/2024 03:44:34 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+ "[INFO|configuration_utils.py:733] 2024-07-01 03:44:34,340 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-01 03:44:34,341 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "07/01/2024 03:44:34 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n",
+ "07/01/2024 03:44:34 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
+ "[INFO|modeling_utils.py:3556] 2024-07-01 03:44:34,397 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/model.safetensors\n",
+ "[INFO|modeling_utils.py:1531] 2024-07-01 03:44:35,481 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+ "[INFO|configuration_utils.py:1000] 2024-07-01 03:44:35,484 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645\n",
+ "}\n",
+ "\n",
+ "[INFO|modeling_utils.py:4364] 2024-07-01 03:45:57,180 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+ "\n",
+ "[INFO|modeling_utils.py:4372] 2024-07-01 03:45:57,180 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-1.5B-Instruct.\n",
+ "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+ "[INFO|configuration_utils.py:955] 2024-07-01 03:45:57,530 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/generation_config.json\n",
+ "[INFO|configuration_utils.py:1000] 2024-07-01 03:45:57,530 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"do_sample\": true,\n",
+ " \"eos_token_id\": [\n",
+ " 151645,\n",
+ " 151643\n",
+ " ],\n",
+ " \"pad_token_id\": 151643,\n",
+ " \"repetition_penalty\": 1.1,\n",
+ " \"temperature\": 0.7,\n",
+ " \"top_k\": 20,\n",
+ " \"top_p\": 0.8\n",
+ "}\n",
+ "\n",
+ "07/01/2024 03:45:57 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
+ "07/01/2024 03:45:58 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-3920\n",
+ "07/01/2024 03:45:58 - INFO - llamafactory.model.loader - all params: 1,552,946,688\n",
+ "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "1.604 GB of memory reserved.\n",
+ "loading train/test data files\n",
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n",
+ "Evaluating model: Qwen/Qwen2-1.5B-Instruct\n",
+ " 0%| | 0/1133 [00:00, ?it/s]--------\n",
+ "step 1: Old Geng raised his pistol, opened it up, and a few bullets flew out, like hailstones. Golden sparrows fell, and grit exploded among the willows, making a tinkling sound.<|im_end|>\n",
+ "--------\n",
+ "step 2: Old Geng raised his pistol, opened it up, and a few bullets flew out, like hailstones. Golden sparrows fell, and grit exploded among the willows, making a tinkling sound.\n",
+ "--------\n",
+ "step 3: Old Geng raised his pistol, opened it up, and a few bullets flew out, like hailstones. Golden sparrows fell, and grit exploded among the willows, making a tinkling sound.\n",
+ "100%|█████████████████████████████████████| 1133/1133 [1:15:29<00:00, 4.00s/it]\n",
+ "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "5.197 GB of memory reserved.\n",
+ " chinese ... Qwen/Qwen2-1.5B-Instruct_checkpoint-3920\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Geng raised his pistol, opened it up, and ...\n",
+ "\n",
+ "[1 rows x 19 columns]\n",
+ "{'accuracy': 0.00176522506619594, 'correct_ids': [147, 199], 'meteor': 0.3479480952549209, 'bleu_scores': {'bleu': 0.08568897530454278, 'precisions': [0.34471041533934044, 0.11467889908256881, 0.051635392233515764, 0.02641279718624235], 'brevity_penalty': 1.0, 'length_ratio': 1.0917853593905267, 'translation_length': 32961, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.3755109220918789, 'rouge2': 0.14664341233690792, 'rougeL': 0.3172964023166135, 'rougeLsum': 0.31738234724622777}}\n",
+ "Epoch 8\n",
+ "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+ "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n",
+ "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n",
+ "Qwen/Qwen2-1.5B-Instruct llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-4480 True datasets/mac/mac.tsv results/mac-results_lf.csv\n",
+ "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.0 GB of memory reserved.\n",
+ "loading model: Qwen/Qwen2-1.5B-Instruct\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 05:01:48,632 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/vocab.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 05:01:48,632 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/merges.txt\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 05:01:48,632 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 05:01:48,632 >> loading file added_tokens.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 05:01:48,632 >> loading file special_tokens_map.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 05:01:48,632 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer_config.json\n",
+ "[WARNING|logging.py:313] 2024-07-01 05:01:48,913 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "07/01/2024 05:01:48 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+ "07/01/2024 05:01:48 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+ "[INFO|configuration_utils.py:733] 2024-07-01 05:01:49,230 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-01 05:01:49,230 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "07/01/2024 05:01:49 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n",
+ "07/01/2024 05:01:49 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
+ "[INFO|modeling_utils.py:3556] 2024-07-01 05:01:49,319 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/model.safetensors\n",
+ "[INFO|modeling_utils.py:1531] 2024-07-01 05:01:51,629 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+ "[INFO|configuration_utils.py:1000] 2024-07-01 05:01:51,633 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645\n",
+ "}\n",
+ "\n",
+ "[INFO|modeling_utils.py:4364] 2024-07-01 05:03:12,246 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+ "\n",
+ "[INFO|modeling_utils.py:4372] 2024-07-01 05:03:12,246 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-1.5B-Instruct.\n",
+ "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+ "[INFO|configuration_utils.py:955] 2024-07-01 05:03:12,762 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/generation_config.json\n",
+ "[INFO|configuration_utils.py:1000] 2024-07-01 05:03:12,762 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"do_sample\": true,\n",
+ " \"eos_token_id\": [\n",
+ " 151645,\n",
+ " 151643\n",
+ " ],\n",
+ " \"pad_token_id\": 151643,\n",
+ " \"repetition_penalty\": 1.1,\n",
+ " \"temperature\": 0.7,\n",
+ " \"top_k\": 20,\n",
+ " \"top_p\": 0.8\n",
+ "}\n",
+ "\n",
+ "07/01/2024 05:03:13 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
+ "07/01/2024 05:03:13 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-4480\n",
+ "07/01/2024 05:03:13 - INFO - llamafactory.model.loader - all params: 1,552,946,688\n",
+ "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "1.604 GB of memory reserved.\n",
+ "loading train/test data files\n",
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n",
+ "Evaluating model: Qwen/Qwen2-1.5B-Instruct\n",
+ " 0%| | 0/1133 [00:00, ?it/s]--------\n",
+ "step 1: Old Geng took a shot with his rifle. A spray of bullets flew forth, like ice pellets, and a cloud of sparrows fell to the ground. Shot after shot, each one accompanied by a crack, exploded against the willows.<|im_end|>\n",
+ "--------\n",
+ "step 2: Old Geng took a shot with his rifle. A spray of bullets flew forth, like ice pellets, and a cloud of sparrows fell to the ground. Shot after shot, each one accompanied by a crack, exploded against the willows.\n",
+ "--------\n",
+ "step 3: Old Geng took a shot with his rifle. A spray of bullets flew forth, like ice pellets, and a cloud of sparrows fell to the ground. Shot after shot, each one accompanied by a crack, exploded against the willows.\n",
+ "100%|█████████████████████████████████████| 1133/1133 [1:16:24<00:00, 4.05s/it]\n",
+ "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "5.236 GB of memory reserved.\n",
+ " chinese ... Qwen/Qwen2-1.5B-Instruct_checkpoint-4480\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Geng took a shot with his rifle. A spray o...\n",
+ "\n",
+ "[1 rows x 20 columns]\n",
+ "{'accuracy': 0.00176522506619594, 'correct_ids': [147, 199], 'meteor': 0.33844145976530193, 'bleu_scores': {'bleu': 0.08009132331873689, 'precisions': [0.33483795251421866, 0.10704716804785346, 0.047180778918814184, 0.024331389503317917], 'brevity_penalty': 1.0, 'length_ratio': 1.1007287181185823, 'translation_length': 33231, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.37147413848910177, 'rouge2': 0.14173580477944275, 'rougeL': 0.31332200211175076, 'rougeLsum': 0.3132659362806373}}\n",
+ "Epoch 9\n",
+ "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+ "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n",
+ "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n",
+ "Qwen/Qwen2-1.5B-Instruct llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-5040 True datasets/mac/mac.tsv results/mac-results_lf.csv\n",
+ "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.0 GB of memory reserved.\n",
+ "loading model: Qwen/Qwen2-1.5B-Instruct\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 06:20:02,103 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/vocab.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 06:20:02,103 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/merges.txt\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 06:20:02,103 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 06:20:02,103 >> loading file added_tokens.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 06:20:02,103 >> loading file special_tokens_map.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 06:20:02,103 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer_config.json\n",
+ "[WARNING|logging.py:313] 2024-07-01 06:20:02,237 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "07/01/2024 06:20:02 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+ "07/01/2024 06:20:02 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+ "[INFO|configuration_utils.py:733] 2024-07-01 06:20:02,540 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-01 06:20:02,540 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "07/01/2024 06:20:02 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n",
+ "07/01/2024 06:20:02 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
+ "[INFO|modeling_utils.py:3556] 2024-07-01 06:20:02,582 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/model.safetensors\n",
+ "[INFO|modeling_utils.py:1531] 2024-07-01 06:20:04,218 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+ "[INFO|configuration_utils.py:1000] 2024-07-01 06:20:04,222 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645\n",
+ "}\n",
+ "\n",
+ "[INFO|modeling_utils.py:4364] 2024-07-01 06:21:26,114 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+ "\n",
+ "[INFO|modeling_utils.py:4372] 2024-07-01 06:21:26,115 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-1.5B-Instruct.\n",
+ "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+ "[INFO|configuration_utils.py:955] 2024-07-01 06:21:26,406 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/generation_config.json\n",
+ "[INFO|configuration_utils.py:1000] 2024-07-01 06:21:26,406 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"do_sample\": true,\n",
+ " \"eos_token_id\": [\n",
+ " 151645,\n",
+ " 151643\n",
+ " ],\n",
+ " \"pad_token_id\": 151643,\n",
+ " \"repetition_penalty\": 1.1,\n",
+ " \"temperature\": 0.7,\n",
+ " \"top_k\": 20,\n",
+ " \"top_p\": 0.8\n",
+ "}\n",
+ "\n",
+ "07/01/2024 06:21:26 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
+ "07/01/2024 06:21:27 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-5040\n",
+ "07/01/2024 06:21:27 - INFO - llamafactory.model.loader - all params: 1,552,946,688\n",
+ "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "1.604 GB of memory reserved.\n",
+ "loading train/test data files\n",
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n",
+ "Evaluating model: Qwen/Qwen2-1.5B-Instruct\n",
+ " 0%| | 0/1133 [00:00, ?it/s]--------\n",
+ "step 1: Old Geng took a step forward, raised his rifle to his eye, and squeezed the trigger. Rifle pellets, bayoneted sparrows, rained down around him. Shotouts of iron sand flew everywhere, crackling as they went.<|im_end|>\n",
+ "--------\n",
+ "step 2: Old Geng took a step forward, raised his rifle to his eye, and squeezed the trigger. Rifle pellets, bayoneted sparrows, rained down around him. Shotouts of iron sand flew everywhere, crackling as they went.\n",
+ "--------\n",
+ "step 3: Old Geng took a step forward, raised his rifle to his eye, and squeezed the trigger. Rifle pellets, bayoneted sparrows, rained down around him. Shotouts of iron sand flew everywhere, crackling as they went.\n",
+ "100%|█████████████████████████████████████| 1133/1133 [1:15:49<00:00, 4.02s/it]\n",
+ "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "5.197 GB of memory reserved.\n",
+ " chinese ... Qwen/Qwen2-1.5B-Instruct_checkpoint-5040\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Geng took a step forward, raised his rifle...\n",
+ "\n",
+ "[1 rows x 21 columns]\n",
+ "{'accuracy': 0.00176522506619594, 'correct_ids': [147, 199], 'meteor': 0.3380289789419591, 'bleu_scores': {'bleu': 0.08738865032530332, 'precisions': [0.36355344170440107, 0.11703423082126911, 0.052124366910523356, 0.026296513331380018], 'brevity_penalty': 1.0, 'length_ratio': 1.0167936402782378, 'translation_length': 30697, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.3674967504985488, 'rouge2': 0.14110284985778096, 'rougeL': 0.3092157882639477, 'rougeLsum': 0.30969047388276916}}\n",
+ "Epoch 10\n",
+ "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n",
+ "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n",
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n",
+ "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n",
+ "Qwen/Qwen2-1.5B-Instruct llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-5600 True datasets/mac/mac.tsv results/mac-results_lf.csv\n",
+ "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "0.0 GB of memory reserved.\n",
+ "loading model: Qwen/Qwen2-1.5B-Instruct\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 07:37:36,379 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/vocab.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 07:37:36,379 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/merges.txt\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 07:37:36,379 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer.json\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 07:37:36,379 >> loading file added_tokens.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 07:37:36,379 >> loading file special_tokens_map.json from cache at None\n",
+ "[INFO|tokenization_utils_base.py:2161] 2024-07-01 07:37:36,379 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer_config.json\n",
+ "[WARNING|logging.py:313] 2024-07-01 07:37:36,515 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "07/01/2024 07:37:36 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
+ "07/01/2024 07:37:36 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
+ "[INFO|configuration_utils.py:733] 2024-07-01 07:37:36,942 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n",
+ "[INFO|configuration_utils.py:800] 2024-07-01 07:37:36,943 >> Model config Qwen2Config {\n",
+ " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n",
+ " \"architectures\": [\n",
+ " \"Qwen2ForCausalLM\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645,\n",
+ " \"hidden_act\": \"silu\",\n",
+ " \"hidden_size\": 1536,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 8960,\n",
+ " \"max_position_embeddings\": 32768,\n",
+ " \"max_window_layers\": 28,\n",
+ " \"model_type\": \"qwen2\",\n",
+ " \"num_attention_heads\": 12,\n",
+ " \"num_hidden_layers\": 28,\n",
+ " \"num_key_value_heads\": 2,\n",
+ " \"rms_norm_eps\": 1e-06,\n",
+ " \"rope_theta\": 1000000.0,\n",
+ " \"sliding_window\": 32768,\n",
+ " \"tie_word_embeddings\": true,\n",
+ " \"torch_dtype\": \"bfloat16\",\n",
+ " \"transformers_version\": \"4.42.3\",\n",
+ " \"use_cache\": true,\n",
+ " \"use_sliding_window\": false,\n",
+ " \"vocab_size\": 151936\n",
+ "}\n",
+ "\n",
+ "07/01/2024 07:37:36 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n",
+ "07/01/2024 07:37:36 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
+ "[INFO|modeling_utils.py:3556] 2024-07-01 07:37:36,987 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/model.safetensors\n",
+ "[INFO|modeling_utils.py:1531] 2024-07-01 07:37:38,446 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
+ "[INFO|configuration_utils.py:1000] 2024-07-01 07:37:38,450 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"eos_token_id\": 151645\n",
+ "}\n",
+ "\n",
+ "[INFO|modeling_utils.py:4364] 2024-07-01 07:39:01,352 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
+ "\n",
+ "[INFO|modeling_utils.py:4372] 2024-07-01 07:39:01,352 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-1.5B-Instruct.\n",
+ "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
+ "[INFO|configuration_utils.py:955] 2024-07-01 07:39:01,658 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/generation_config.json\n",
+ "[INFO|configuration_utils.py:1000] 2024-07-01 07:39:01,658 >> Generate config GenerationConfig {\n",
+ " \"bos_token_id\": 151643,\n",
+ " \"do_sample\": true,\n",
+ " \"eos_token_id\": [\n",
+ " 151645,\n",
+ " 151643\n",
+ " ],\n",
+ " \"pad_token_id\": 151643,\n",
+ " \"repetition_penalty\": 1.1,\n",
+ " \"temperature\": 0.7,\n",
+ " \"top_k\": 20,\n",
+ " \"top_p\": 0.8\n",
+ "}\n",
+ "\n",
+ "07/01/2024 07:39:02 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
+ "07/01/2024 07:39:02 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-5600\n",
+ "07/01/2024 07:39:02 - INFO - llamafactory.model.loader - all params: 1,552,946,688\n",
+ "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "1.604 GB of memory reserved.\n",
+ "loading train/test data files\n",
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 4528\n",
+ " })\n",
+ " test: Dataset({\n",
+ " features: ['chinese', 'english', 'text', 'prompt'],\n",
+ " num_rows: 1133\n",
+ " })\n",
+ "})\n",
+ "Evaluating model: Qwen/Qwen2-1.5B-Instruct\n",
+ " 0%| | 0/1133 [00:00, ?it/s]--------\n",
+ "step 1: Old Geng reached for his rifle, wedged it to his eye, took a squeeze, and fired—a shower of lead pellets flew from the barrel, crackering through the air as they hit.<|im_end|>\n",
+ "--------\n",
+ "step 2: Old Geng reached for his rifle, wedged it to his eye, took a squeeze, and fired—a shower of lead pellets flew from the barrel, crackering through the air as they hit.\n",
+ "--------\n",
+ "step 3: Old Geng reached for his rifle, wedged it to his eye, took a squeeze, and fired—a shower of lead pellets flew from the barrel, crackering through the air as they hit.\n",
+ "100%|███████████████████████████████████████| 1133/1133 [43:28<00:00, 2.30s/it]\n",
+ "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n",
+ "1.877 GB of memory reserved.\n",
+ " chinese ... Qwen/Qwen2-1.5B-Instruct_checkpoint-5600\n",
+ "0 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞... ... Old Geng reached for his rifle, wedged it to h...\n",
+ "\n",
+ "[1 rows x 22 columns]\n",
+ "{'accuracy': 0.00176522506619594, 'correct_ids': [147, 199], 'meteor': 0.3339867178782917, 'bleu_scores': {'bleu': 0.08544000315753703, 'precisions': [0.3757308441891476, 0.11972682649213914, 0.05255355422133274, 0.025644000928289626], 'brevity_penalty': 0.9682716284409708, 'length_ratio': 0.9687644915534945, 'translation_length': 29247, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.3631078400113029, 'rouge2': 0.13850862500702893, 'rougeL': 0.3081859195764205, 'rougeLsum': 0.30821718216431304}}\n",
+ "CPU times: user 21min 38s, sys: 7min 47s, total: 29min 25s\n",
+ "Wall time: 10h 26min 31s\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "os.environ[\"MODEL_NAME\"] = \"Qwen/Qwen2-1.5B-Instruct\" \n",
+ "for i in range(1, num_train_epochs + 1):\n",
+ " print(f\"Epoch {i}\")\n",
+ " adapter_path = f\"llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-{560 * i}\"\n",
+ " os.environ[\"ADAPTER_NAME_OR_PATH\"] = adapter_path\n",
+ " !python llm_toolkit/eval.py "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%time\n",
+ "\n",
+ "os.environ[\"MODEL_NAME\"] = \"Qwen/Qwen2-7B-Instruct\" \n",
+ "for i in range(1, num_train_epochs + 1):\n",
+ " print(f\"Epoch {i}\")\n",
+ " adapter_path = f\"llama-factory/saves/qwen2-7b/lora/sft/checkpoint-{560 * i}\"\n",
+ " os.environ[\"ADAPTER_NAME_OR_PATH\"] = adapter_path\n",
+ " !python llm_toolkit/eval.py "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from langchain_openai import ChatOpenAI\n",
+ "\n",
+ "llm = ChatOpenAI(\n",
+ " model=\"gpt-4o\",\n",
+ " temperature=0,\n",
+ " max_tokens=None,\n",
+ " timeout=None,\n",
+ " max_retries=2,\n",
+ " # api_key=\"...\", # if you prefer to pass api key in directly instaed of using env vars\n",
+ " base_url=\"http://localhost:8000/v1\",\n",
+ " # organization=\"...\",\n",
+ " # other params...\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n",
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n"
+ ]
+ }
+ ],
+ "source": [
+ "from llm_toolkit.translation_utils import *"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'The body was found on the morning beach'"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "translate_via_llm(\"死者凌晨去的沙滩\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "cache_dict = {}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'死者凌晨去的沙滩': 'The body was found on the morning beach'}"
+ ]
+ },
+ "execution_count": 34,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "translate(\n",
+ " \"死者凌晨去的沙滩\",\n",
+ " cache_dict=cache_dict,\n",
+ ")\n",
+ "cache_dict"
+ ]
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "application/vnd.databricks.v1+notebook": {
+ "dashboards": [],
+ "environmentMetadata": null,
+ "language": "python",
+ "notebookMetadata": {
+ "pythonIndentUnit": 4
+ },
+ "notebookName": "07_MAC_+_Qwen2-7B-Instructi_Unsloth_train",
+ "widgets": {}
+ },
+ "colab": {
+ "gpuType": "T4",
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.9"
+ },
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "036fc5746f43416db18c19ad8fd36677": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "06e806c82c7b4cbea31c5358dd9c3434": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "087b76a8b7514269b1f0ab29b062e444": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_a069d2ab23824f29aa320ac256e2cfe9",
+ "placeholder": "",
+ "style": "IPY_MODEL_06e806c82c7b4cbea31c5358dd9c3434",
+ "value": "Map (num_proc=2): 100%"
+ }
+ },
+ "09b76013aa9e45efb6deb23a7a0d0925": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_dea41c5260884aa6879b5e1d1697b14f",
+ "placeholder": "",
+ "style": "IPY_MODEL_89965917796a4f81b899fdc7685f33df",
+ "value": "config.json: 100%"
+ }
+ },
+ "0a92c56bfa134ef583220d7ef0b13e17": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "0c34be936c8145d3ab41282f30a70713": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "0f8b6bfe16894500838793f2491d403f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "177c78fce95d4b4ab33057c5a048d693": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "1f44c9ce1adf470cbb19784493ed209f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_0c34be936c8145d3ab41282f30a70713",
+ "placeholder": "",
+ "style": "IPY_MODEL_0a92c56bfa134ef583220d7ef0b13e17",
+ "value": "model.safetensors: 100%"
+ }
+ },
+ "201b59ccd9f845e197029b57e424aefc": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "2157f01726d748f8a9ae4a00664430da": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "21db8a77b00d4a4e82fdfa608657531f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "26e4202cca81496a90d15a0dd4ca9cf1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_ba90fdb8822d47dab7ba203bee297f37",
+ "IPY_MODEL_61560ff6a36b44f4a9dfdae5c52791d4",
+ "IPY_MODEL_95fbe66647904c06a20f640630d6dc0e"
+ ],
+ "layout": "IPY_MODEL_57182a263d324a3dbf1471c74290a0d5"
+ }
+ },
+ "27155728b6b84cb199c91c940095d0a8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_6b91feeed5464877991ac2c207aebe7c",
+ "IPY_MODEL_cca8113c54c0495daedce1327bf9c68b",
+ "IPY_MODEL_2e63a29e2f7247bba5beede9a568c99f"
+ ],
+ "layout": "IPY_MODEL_5c9d781c28944f3eb86e2a6d44efdf18"
+ }
+ },
+ "271ddaa553a042d09b6db7b450643d8f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "2a58d04b428c46f4b3dbadd3bc6cd529": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2d18ddf6482c4d97829ac0e5a7b9868f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_9f679ad3ec7f4fe8ad0510ffb57bc2ab",
+ "IPY_MODEL_f2df530d22c74977b249dd9fb5f4829b",
+ "IPY_MODEL_89b2ef0dbfea47ab8e6f8d659e3351d1"
+ ],
+ "layout": "IPY_MODEL_3056b148aa9f4e6e8aa3b61d26886255"
+ }
+ },
+ "2e5087c76f98437cb5dc729230358cba": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2e63a29e2f7247bba5beede9a568c99f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b993eaec6b224440bf80c0958c6fb536",
+ "placeholder": "",
+ "style": "IPY_MODEL_de868e26e7154f62aa86223a539ad421",
+ "value": " 464/464 [00:00<00:00, 27.1kB/s]"
+ }
+ },
+ "2f6c70dd266c4816bfad3fd3d192929a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "30307300bc4e4baf96560e30969a82b6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e36a3f9eff0e4cf68834d66b0213ae96",
+ "placeholder": "",
+ "style": "IPY_MODEL_a0037bdccf254159becde630bee3d1db",
+ "value": "generation_config.json: 100%"
+ }
+ },
+ "3056b148aa9f4e6e8aa3b61d26886255": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "30cdc32298134cb0be4d41615b9e5774": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "3572201bd4d74a58b7a665f9bdfdcdba": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "35b0e8c26d6640e9bd0ed7b242a423d8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_2e5087c76f98437cb5dc729230358cba",
+ "max": 51760,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_036fc5746f43416db18c19ad8fd36677",
+ "value": 51760
+ }
+ },
+ "36166c7bcb854b34aca1f41a5d6ea50b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "370692d819df41828b48c4ad446f977b": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "39b29a75374b45c0a22506010be2b84e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_30cdc32298134cb0be4d41615b9e5774",
+ "max": 1179,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_47928317548c454bba6358ab132e8dee",
+ "value": 1179
+ }
+ },
+ "3cf2dd993b5e4d3daecf61e4bab5a404": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_087b76a8b7514269b1f0ab29b062e444",
+ "IPY_MODEL_35b0e8c26d6640e9bd0ed7b242a423d8",
+ "IPY_MODEL_54ad89e05fd74576b9b8b5b5a10eaf8d"
+ ],
+ "layout": "IPY_MODEL_a41dc44766444a998bec2d777f249d23"
+ }
+ },
+ "43dec2ede91341f5af60eb522e18e984": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4463edd481c1467f914c7dcd6c6e6ffc": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "47928317548c454bba6358ab132e8dee": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "49277aeeac16434a865a4d12308b1abc": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4ae7e449e4ea4c729b5f34607c18ebae": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4b2061b8a73c43ffb0c2f83daf0d0183": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4c4c88d4c701450692fa0f6b0c5764b0": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4c666f4ace3943f8b80ecd20e7503236": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "4ccedf0d93094e63b57a0f8a434fba06": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4463edd481c1467f914c7dcd6c6e6ffc",
+ "max": 44307561,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_6d3b9a05db0b4dadb638c686faa0c40a",
+ "value": 44307561
+ }
+ },
+ "4dcf6ff672d24983a1877a8431709aa9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_5807d5fb827d490fb3bc698f801ffff5",
+ "placeholder": "",
+ "style": "IPY_MODEL_c4f2b06a82fd4987b8b659524a7b503b",
+ "value": "Generating train split: 100%"
+ }
+ },
+ "4ea63adfce694725bdba878aef709dd3": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5234566b1bfc4655b8d582ea5b46ed9f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "54ad89e05fd74576b9b8b5b5a10eaf8d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_fdb1941405ed4e4aa06019933892deb3",
+ "placeholder": "",
+ "style": "IPY_MODEL_668d5377ca56426a99753867e6e24862",
+ "value": " 51760/51760 [01:02<00:00, 1131.51 examples/s]"
+ }
+ },
+ "56aee4853b7740e6a977254f5d1fa66d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "57182a263d324a3dbf1471c74290a0d5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5807d5fb827d490fb3bc698f801ffff5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5c9d781c28944f3eb86e2a6d44efdf18": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5f40db8173dd4d76b6ef5ed6d9ec8b6e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "61560ff6a36b44f4a9dfdae5c52791d4": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_db19fc8d37db4e45a5790a876836d8c4",
+ "max": 11610,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_36166c7bcb854b34aca1f41a5d6ea50b",
+ "value": 11610
+ }
+ },
+ "6578fd7acdb54c4c93528ea431fd0144": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_370692d819df41828b48c4ad446f977b",
+ "placeholder": "",
+ "style": "IPY_MODEL_a0bf9160eb2647409b3200270914b90f",
+ "value": " 50.6k/50.6k [00:00<00:00, 2.71MB/s]"
+ }
+ },
+ "668d5377ca56426a99753867e6e24862": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "697f027529b54ee9956bae78a11e0611": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "69ac12aec0714318bf2c83d4f4e745f5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "6b2012c3f88547af8884a9ea90e3164b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_938f45f1b3e24118b815d96ae34ba86a",
+ "placeholder": "",
+ "style": "IPY_MODEL_9367047a800747f79c6b225d92397846",
+ "value": " 44.3M/44.3M [00:01<00:00, 31.0MB/s]"
+ }
+ },
+ "6b91feeed5464877991ac2c207aebe7c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4b2061b8a73c43ffb0c2f83daf0d0183",
+ "placeholder": "",
+ "style": "IPY_MODEL_69ac12aec0714318bf2c83d4f4e745f5",
+ "value": "special_tokens_map.json: 100%"
+ }
+ },
+ "6d3b9a05db0b4dadb638c686faa0c40a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "6dbbedeca9314e66ae50e44ffa31a414": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "6e34619b45934040b6092e6fb01ea7fe": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "71ce208e20d6483abb9ed923510c86d7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d69dc491b3ab44d7852b21873ed7bb7f",
+ "placeholder": "",
+ "style": "IPY_MODEL_f401d53bf28e44eb906bce6c05412662",
+ "value": " 51760/51760 [00:01<00:00, 45512.81 examples/s]"
+ }
+ },
+ "7358cdad832342c983e31efb8754ab78": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "73e352a3404f4c7dad0737f57d29e92f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_988a0e8c1f89446086858da0a891a79c",
+ "IPY_MODEL_4ccedf0d93094e63b57a0f8a434fba06",
+ "IPY_MODEL_6b2012c3f88547af8884a9ea90e3164b"
+ ],
+ "layout": "IPY_MODEL_7e29cb8dd4df4d5b94407cd8fd3f2011"
+ }
+ },
+ "74501720ac7e4dbb911a4a99b3633bc6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "78e5400bff924a92a4cc61c4ff18b182": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b9b313fd861948f5aba25b24b1518d30",
+ "placeholder": "",
+ "style": "IPY_MODEL_4c666f4ace3943f8b80ecd20e7503236",
+ "value": " 1.18k/1.18k [00:00<00:00, 31.3kB/s]"
+ }
+ },
+ "7975adbc2ec5489ea7fa0167e620d85c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_6e34619b45934040b6092e6fb01ea7fe",
+ "max": 51760,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_271ddaa553a042d09b6db7b450643d8f",
+ "value": 51760
+ }
+ },
+ "7e29cb8dd4df4d5b94407cd8fd3f2011": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "810ff6c0e17d4fa09a30fef27eacff90": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "89965917796a4f81b899fdc7685f33df": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "89b2ef0dbfea47ab8e6f8d659e3351d1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b8908fa0df3743ecb9d12983a739104f",
+ "placeholder": "",
+ "style": "IPY_MODEL_177c78fce95d4b4ab33057c5a048d693",
+ "value": " 9.09M/9.09M [00:00<00:00, 32.6MB/s]"
+ }
+ },
+ "8b3505352a5a42bf910428c40ce40465": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_49277aeeac16434a865a4d12308b1abc",
+ "placeholder": "",
+ "style": "IPY_MODEL_2157f01726d748f8a9ae4a00664430da",
+ "value": " 5.70G/5.70G [01:02<00:00, 30.1MB/s]"
+ }
+ },
+ "8fc142b628fb40568730234de1cafde2": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4ae7e449e4ea4c729b5f34607c18ebae",
+ "max": 172,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_3572201bd4d74a58b7a665f9bdfdcdba",
+ "value": 172
+ }
+ },
+ "9367047a800747f79c6b225d92397846": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "938f45f1b3e24118b815d96ae34ba86a": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "95fbe66647904c06a20f640630d6dc0e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b0a370dc20654b279b9680692e34418e",
+ "placeholder": "",
+ "style": "IPY_MODEL_cfeb365ddf7548d58b2557f22737fcf5",
+ "value": " 11.6k/11.6k [00:00<00:00, 716kB/s]"
+ }
+ },
+ "988a0e8c1f89446086858da0a891a79c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_ad2be500fc164c0f86f33e914ef8e6a0",
+ "placeholder": "",
+ "style": "IPY_MODEL_5234566b1bfc4655b8d582ea5b46ed9f",
+ "value": "Downloading data: 100%"
+ }
+ },
+ "98c58f23f4d549518832cb2d18f796e8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_09b76013aa9e45efb6deb23a7a0d0925",
+ "IPY_MODEL_39b29a75374b45c0a22506010be2b84e",
+ "IPY_MODEL_78e5400bff924a92a4cc61c4ff18b182"
+ ],
+ "layout": "IPY_MODEL_2a58d04b428c46f4b3dbadd3bc6cd529"
+ }
+ },
+ "99fdbb0300c14c139d1937c646f0cfe7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_7358cdad832342c983e31efb8754ab78",
+ "placeholder": "",
+ "style": "IPY_MODEL_e9adf418296e436fb48bb9f78885598b",
+ "value": " 51760/51760 [00:01<00:00, 38665.95 examples/s]"
+ }
+ },
+ "9f679ad3ec7f4fe8ad0510ffb57bc2ab": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4ea63adfce694725bdba878aef709dd3",
+ "placeholder": "",
+ "style": "IPY_MODEL_74501720ac7e4dbb911a4a99b3633bc6",
+ "value": "tokenizer.json: 100%"
+ }
+ },
+ "a0037bdccf254159becde630bee3d1db": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "a069d2ab23824f29aa320ac256e2cfe9": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a0bf9160eb2647409b3200270914b90f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "a41dc44766444a998bec2d777f249d23": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a8464a4c711e4e00aafdfc919b60d07e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_fb995c740590427b882572c81d4e848c",
+ "placeholder": "",
+ "style": "IPY_MODEL_201b59ccd9f845e197029b57e424aefc",
+ "value": " 172/172 [00:00<00:00, 12.0kB/s]"
+ }
+ },
+ "a9f0cc51fc3d4d7b874c32dcf1c5bdf2": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ad2be500fc164c0f86f33e914ef8e6a0": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b0240cd9a4554b29ae11f8051984a1c6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_edaf890370314a218f138015faa0b05d",
+ "placeholder": "",
+ "style": "IPY_MODEL_697f027529b54ee9956bae78a11e0611",
+ "value": "Map: 100%"
+ }
+ },
+ "b0a370dc20654b279b9680692e34418e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b518dcee69074b87be73957cd810e7ed": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d891f8d0b1fc462f8008d02bb2a15692",
+ "placeholder": "",
+ "style": "IPY_MODEL_cced8fd7e998472794f3f3e3018956a5",
+ "value": "tokenizer_config.json: 100%"
+ }
+ },
+ "b8908fa0df3743ecb9d12983a739104f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b993eaec6b224440bf80c0958c6fb536": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b9b313fd861948f5aba25b24b1518d30": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ba90fdb8822d47dab7ba203bee297f37": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_0f8b6bfe16894500838793f2491d403f",
+ "placeholder": "",
+ "style": "IPY_MODEL_bb19f6c747754682a514373a3a0535ba",
+ "value": "Downloading readme: 100%"
+ }
+ },
+ "bb19f6c747754682a514373a3a0535ba": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "bc883d4cf13e4f8b8a4fe5f410cb6efd": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e9159e03e61f4f56978ece9c3bca49b2",
+ "max": 51760,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_810ff6c0e17d4fa09a30fef27eacff90",
+ "value": 51760
+ }
+ },
+ "c161d94df0f04feba9542237e0856c22": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "c22f71b1f85843209d7e5321506b9cb9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_1f44c9ce1adf470cbb19784493ed209f",
+ "IPY_MODEL_f1addc4479d849879e743cf9089e6540",
+ "IPY_MODEL_8b3505352a5a42bf910428c40ce40465"
+ ],
+ "layout": "IPY_MODEL_4c4c88d4c701450692fa0f6b0c5764b0"
+ }
+ },
+ "c4f2b06a82fd4987b8b659524a7b503b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "cca8113c54c0495daedce1327bf9c68b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e02f9b7849c64531835eb77b860d1c93",
+ "max": 464,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_56aee4853b7740e6a977254f5d1fa66d",
+ "value": 464
+ }
+ },
+ "cced8fd7e998472794f3f3e3018956a5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "cf245afeb1c04f29a24d291608c3d157": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_b518dcee69074b87be73957cd810e7ed",
+ "IPY_MODEL_e29104486d594b2992d7285e0ef77371",
+ "IPY_MODEL_6578fd7acdb54c4c93528ea431fd0144"
+ ],
+ "layout": "IPY_MODEL_d35db8148a354c56aaac56dbae22536f"
+ }
+ },
+ "cfe8cae0e22b495bafa221a63d13b283": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "cfeb365ddf7548d58b2557f22737fcf5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "d1b47d39450d4019ae85c9b2f943eeaf": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_4dcf6ff672d24983a1877a8431709aa9",
+ "IPY_MODEL_7975adbc2ec5489ea7fa0167e620d85c",
+ "IPY_MODEL_71ce208e20d6483abb9ed923510c86d7"
+ ],
+ "layout": "IPY_MODEL_cfe8cae0e22b495bafa221a63d13b283"
+ }
+ },
+ "d35db8148a354c56aaac56dbae22536f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d69dc491b3ab44d7852b21873ed7bb7f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d891f8d0b1fc462f8008d02bb2a15692": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d8e5318cead340c4adbeaccc05d39225": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "daf4cd890b35422683d22fd30bc71e83": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_b0240cd9a4554b29ae11f8051984a1c6",
+ "IPY_MODEL_bc883d4cf13e4f8b8a4fe5f410cb6efd",
+ "IPY_MODEL_99fdbb0300c14c139d1937c646f0cfe7"
+ ],
+ "layout": "IPY_MODEL_c161d94df0f04feba9542237e0856c22"
+ }
+ },
+ "db19fc8d37db4e45a5790a876836d8c4": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "de868e26e7154f62aa86223a539ad421": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "dea41c5260884aa6879b5e1d1697b14f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e02f9b7849c64531835eb77b860d1c93": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e29104486d594b2992d7285e0ef77371": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_a9f0cc51fc3d4d7b874c32dcf1c5bdf2",
+ "max": 50641,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_2f6c70dd266c4816bfad3fd3d192929a",
+ "value": 50641
+ }
+ },
+ "e36a3f9eff0e4cf68834d66b0213ae96": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e9159e03e61f4f56978ece9c3bca49b2": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e9adf418296e436fb48bb9f78885598b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "edaf890370314a218f138015faa0b05d": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "f1addc4479d849879e743cf9089e6540": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_43dec2ede91341f5af60eb522e18e984",
+ "max": 5702746405,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_d8e5318cead340c4adbeaccc05d39225",
+ "value": 5702746405
+ }
+ },
+ "f2df530d22c74977b249dd9fb5f4829b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_21db8a77b00d4a4e82fdfa608657531f",
+ "max": 9085698,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_6dbbedeca9314e66ae50e44ffa31a414",
+ "value": 9085698
+ }
+ },
+ "f401d53bf28e44eb906bce6c05412662": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "fb995c740590427b882572c81d4e848c": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "fce7a61c25ec4390af43d92b7c473a45": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_30307300bc4e4baf96560e30969a82b6",
+ "IPY_MODEL_8fc142b628fb40568730234de1cafde2",
+ "IPY_MODEL_a8464a4c711e4e00aafdfc919b60d07e"
+ ],
+ "layout": "IPY_MODEL_5f40db8173dd4d76b6ef5ed6d9ec8b6e"
+ }
+ },
+ "fdb1941405ed4e4aa06019933892deb3": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ }
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/requirements.txt b/requirements.txt
index bc5e1604e100b08051f6aa31735e0caf820b8a91..88b4471380a748806c78820270adacec1f5ad824 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1,15 @@
-huggingface_hub==0.22.2
\ No newline at end of file
+nltk==3.8.1
+python-dotenv==1.0.1
+black==24.4.0
+evaluate==0.4.2
+rouge_score==0.1.2
+pytest==8.2.1
+seaborn==0.13.2
+scikit-learn==1.5.0
+jupyter
+ipywidgets
+packaging
+# triton
+# xformers
+langchain_openai==0.1.13
+wandb==0.17.4
\ No newline at end of file
diff --git a/results/experiment-1-results.csv b/results/experiment-1-results.csv
new file mode 100644
index 0000000000000000000000000000000000000000..c4faac54ce32c8000e989a73c49d7ae296c8528a
--- /dev/null
+++ b/results/experiment-1-results.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bfb0c7a3813e9c98c9245c9303b2fb95c1fd7d6a92dd4e0d9d3fe4e4d29a8849
+size 2072299
diff --git a/results/experiment-2-results.csv b/results/experiment-2-results.csv
new file mode 100644
index 0000000000000000000000000000000000000000..14e26f3b21a462767d5668ae196f7708df50c840
--- /dev/null
+++ b/results/experiment-2-results.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b1c99b9bb0c6539a9ff3c9198d730f110c5b6371cba803e1992802beb13e3600
+size 2038783
diff --git a/results/experiment-3-results.csv b/results/experiment-3-results.csv
new file mode 100644
index 0000000000000000000000000000000000000000..00a38db0521e984f761bb4641942326b54a92999
--- /dev/null
+++ b/results/experiment-3-results.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f0b8dcb783ed847422ca4f2000b5106742b992537f4b84da6b5ca0b4c22bf0dd
+size 1427300
diff --git a/results/mac-results-no-flash-attn.csv b/results/mac-results-no-flash-attn.csv
new file mode 100644
index 0000000000000000000000000000000000000000..fa6b56dd5472dc6857df51d9620365ab59e5a882
--- /dev/null
+++ b/results/mac-results-no-flash-attn.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:89144b0a3e727b326be559637312e353208a7e506b7c0c701ce8e4392e4cbb5e
+size 2129451
diff --git a/results/mac-results-with-flash-attn.csv b/results/mac-results-with-flash-attn.csv
new file mode 100644
index 0000000000000000000000000000000000000000..03c87b8c4109476831d4e61103c3f7a252b9053d
--- /dev/null
+++ b/results/mac-results-with-flash-attn.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c73be2c390511d0a59090b57c53f0a66c0d4c4648c209ef7155aa97ff73c0b9
+size 1461478
diff --git a/results/mac-results.csv b/results/mac-results.csv
new file mode 100644
index 0000000000000000000000000000000000000000..c539fdaa38a9b0d4f7756eb8291d29117fab5a1f
--- /dev/null
+++ b/results/mac-results.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7eb1c66dd7162f27a969599ddb3695c3ac82a88bff15cd57d7ed00ca86ab19cd
+size 2072299
diff --git a/results/mac-results_final.csv b/results/mac-results_final.csv
new file mode 100644
index 0000000000000000000000000000000000000000..9e5e7801a13b500be3b8d2faeceeb90b221311ed
--- /dev/null
+++ b/results/mac-results_final.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aacf61087ae3b1fd622407c75d0a969b232517c7489841da722e0228bb69a310
+size 2334006
diff --git a/results/mac-results_lf-r2.csv b/results/mac-results_lf-r2.csv
new file mode 100644
index 0000000000000000000000000000000000000000..8b931681a28971d8b5f7ba674d1f7a4c24df55e4
--- /dev/null
+++ b/results/mac-results_lf-r2.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:25c14d76c8d71ecbce6bc83d641ec4f54f6c0e188fccfcfd8536758a12ed456a
+size 2442353
diff --git a/results/mac-results_lf-r3.csv b/results/mac-results_lf-r3.csv
new file mode 100644
index 0000000000000000000000000000000000000000..4c6c42fc624a7b0ab9958a23136cb4b5799c8664
--- /dev/null
+++ b/results/mac-results_lf-r3.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0ea9402ad5c87e3b7dcb570cf0a3c0bf33bef093c522d4d2ba6dbf633e21f035
+size 531603
diff --git a/results/mac-results_lf.csv b/results/mac-results_lf.csv
new file mode 100644
index 0000000000000000000000000000000000000000..a196bab99c61126a9093c872fd70d36fb62dfa3c
--- /dev/null
+++ b/results/mac-results_lf.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c5acc087808de5df6839cbf7b170094c6e63445aab4bea15e4be9564b905eb51
+size 3236072
diff --git a/results/mac-results_py3.11.csv b/results/mac-results_py3.11.csv
new file mode 100644
index 0000000000000000000000000000000000000000..42e2b1fa940709ed288fa4ac86cb053456340d5f
--- /dev/null
+++ b/results/mac-results_py3.11.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4adb0922c02cc435858b4ba44b4cdaaee4afe6fcc8721a795d740c36d8d94c2c
+size 1463058
diff --git a/results/mac-results_v3.csv b/results/mac-results_v3.csv
new file mode 100644
index 0000000000000000000000000000000000000000..5c11486f1305574d95d92adbfefd5b8f75ba86bb
--- /dev/null
+++ b/results/mac-results_v3.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8bfe9ce9720d0cf67ba118d8b2d82f8f6c0bd0f763a8aa00fc1f43f58e544157
+size 1683953
diff --git a/results/model_training_evaluation_times.csv b/results/model_training_evaluation_times.csv
new file mode 100644
index 0000000000000000000000000000000000000000..e22a30aed88fc9cc41dda1225bce6f129313a705
--- /dev/null
+++ b/results/model_training_evaluation_times.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5691ccd7fafb765772c2e5da0ada82bd2f3532459dcfed8517565e7cc0d9f1a8
+size 441
diff --git a/scripts/lf-api.sh b/scripts/lf-api.sh
new file mode 100755
index 0000000000000000000000000000000000000000..d0ccb5e02427288d0570acbf788679dc6ea32dba
--- /dev/null
+++ b/scripts/lf-api.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+BASEDIR=$(dirname "$0")
+cd $BASEDIR/../llama-factory
+echo Current Directory:
+pwd
+
+API_PORT=8000 llamafactory-cli api $1
diff --git a/scripts/tune-large.sh b/scripts/tune-large.sh
new file mode 100755
index 0000000000000000000000000000000000000000..04fe8f15221634a57547af5c8bdab0ba3972391b
--- /dev/null
+++ b/scripts/tune-large.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+BASEDIR=$(dirname "$0")
+cd $BASEDIR
+echo Current Directory:
+pwd
+
+nvidia-smi
+uname -a
+cat /etc/os-release
+lscpu
+grep MemTotal /proc/meminfo
+
+# pip install -r requirements.txt
+# FLASH_ATTENTION_FORCE_BUILD=TRUE pip install --upgrade flash-attn
+
+# export MODEL_NAME=unsloth/Qwen2-72B-Instruct-bnb-4bit
+# echo Tuning $MODEL_NAME
+# python tune.py
+
+export MODEL_NAME=unsloth/llama-3-70b-Instruct-bnb-4bit
+echo Tuning $MODEL_NAME
+python tune.py
+
diff --git a/scripts/tune-lf.sh b/scripts/tune-lf.sh
new file mode 100644
index 0000000000000000000000000000000000000000..0b722d60fe1b09cbc086ca4e1aa49265f0932b23
--- /dev/null
+++ b/scripts/tune-lf.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+BASEDIR=$(dirname "$0")
+cd $BASEDIR/../llama-factory
+echo Current Directory:
+pwd
+
+YAML=$1 python -c 'import os, json, sys, yaml; filename=os.getenv("YAML"); y=yaml.safe_load(open(filename)) ; print(f"{filename}:\n", json.dumps(y, indent=2))'
+llamafactory-cli train $1
\ No newline at end of file
diff --git a/scripts/tune-medium.sh b/scripts/tune-medium.sh
new file mode 100755
index 0000000000000000000000000000000000000000..fc27178be736afc77c866b41d4a22ab603894b80
--- /dev/null
+++ b/scripts/tune-medium.sh
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+BASEDIR=$(dirname "$0")
+cd $BASEDIR
+echo Current Directory:
+pwd
+
+nvidia-smi
+uname -a
+cat /etc/os-release
+lscpu
+grep MemTotal /proc/meminfo
+
+# pip install -r requirements.txt
+# FLASH_ATTENTION_FORCE_BUILD=TRUE pip install --upgrade flash-attn
+
+export MODEL_NAME=unsloth/Qwen2-7B-Instruct
+echo Tuning $MODEL_NAME
+python llm_toolkit/tune.py
+
+export MODEL_NAME=unsloth/mistral-7b-instruct-v0.3
+echo Tuning $MODEL_NAME
+python llm_toolkit/tune.py
+
+export MODEL_NAME=gradientai/Llama-3-8B-Instruct-Gradient-1048k
+echo Tuning $MODEL_NAME
+python llm_toolkit/tune.py
diff --git a/scripts/tune-small-2.sh b/scripts/tune-small-2.sh
new file mode 100755
index 0000000000000000000000000000000000000000..0dd6a2e4a8f74e150563dc5cadceabd992f45888
--- /dev/null
+++ b/scripts/tune-small-2.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+BASEDIR=$(dirname "$0")
+cd $BASEDIR/..
+echo Current Directory:
+pwd
+
+export MODEL_NAME=unsloth/Qwen2-0.5B-Instruct
+echo Tuning $MODEL_NAME
+python llm_toolkit/tune.py
+
+export MODEL_NAME=unsloth/Qwen2-1.5B-Instruct
+echo Tuning $MODEL_NAME
+python llm_toolkit/tune.py
diff --git a/scripts/tune-small.sh b/scripts/tune-small.sh
new file mode 100755
index 0000000000000000000000000000000000000000..aa77dd81174e7e518d9b91a2e4d0b42244ebf438
--- /dev/null
+++ b/scripts/tune-small.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+BASEDIR=$(dirname "$0")
+cd $BASEDIR/..
+echo Current Directory:
+pwd
+
+export MODEL_NAME=unsloth/Qwen2-0.5B-Instruct-bnb-4bit
+echo Tuning $MODEL_NAME
+python llm_toolkit/tune.py
+
+export MODEL_NAME=unsloth/Qwen2-1.5B-Instruct-bnb-4bit
+echo Tuning $MODEL_NAME
+python llm_toolkit/tune.py