{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU", "widgets": { "application/vnd.jupyter.widget-state+json": { "76eba77b0fc9499e9b4015393156153e": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_c42123767d4849ffbcdd48f09580ea13", "IPY_MODEL_e89f07c1f5744328b071f96a3bdc9532", "IPY_MODEL_da36c71ba5bc469eb32bde730c5038d3" ], "layout": "IPY_MODEL_6cc6b7c564fe48f086152c78f8a9a915" } }, "c42123767d4849ffbcdd48f09580ea13": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_c6a3adfa76dd4bbd8cc5e75ee28ef3e8", "placeholder": "​", "style": "IPY_MODEL_0c65f0e913b74de88ab712d65b9027f7", "value": "Downloading data files: 100%" } }, "e89f07c1f5744328b071f96a3bdc9532": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_b07c7f3b6bc24f10821b88e254e88e43", "max": 1, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_42ea2834ee074029abe8cf8f9ff79a16", "value": 1 } }, "da36c71ba5bc469eb32bde730c5038d3": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_ec4205b0491a4f53ad1cba42689d59c4", "placeholder": "​", "style": "IPY_MODEL_f8b9df09c8c54d53a22310e64e30772f", "value": " 1/1 [00:00<00:00, 39.39it/s]" } }, "6cc6b7c564fe48f086152c78f8a9a915": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "c6a3adfa76dd4bbd8cc5e75ee28ef3e8": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "0c65f0e913b74de88ab712d65b9027f7": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "b07c7f3b6bc24f10821b88e254e88e43": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "42ea2834ee074029abe8cf8f9ff79a16": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "ec4205b0491a4f53ad1cba42689d59c4": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "f8b9df09c8c54d53a22310e64e30772f": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "e69b8912d3384180842ff0ead6096803": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_550a2ef0612e4d8396462d1b1c43608d", "IPY_MODEL_f8d82b4d016f46ca8bab1a2fe591861e", "IPY_MODEL_975d908a1874425198e0d62ae884a3ef" ], "layout": "IPY_MODEL_152aa8231b3f4b7db4e6f27bb61065b9" } }, "550a2ef0612e4d8396462d1b1c43608d": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_27b48982ae244c66bccd855e7e33a00d", "placeholder": "​", "style": "IPY_MODEL_a896cb94d1fc4641ad224c7a9ca619b1", "value": "Extracting data files: 100%" } }, "f8d82b4d016f46ca8bab1a2fe591861e": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_be5d02d7001a4a25a848d6b0c582b42b", "max": 1, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_609437910022418e83543fe6c3d16501", "value": 1 } }, "975d908a1874425198e0d62ae884a3ef": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_c3b5ef42dae64f95a2ef055db7e50fa8", "placeholder": "​", "style": "IPY_MODEL_ee0f0aff0c254b82854d1a6f4b367161", "value": " 1/1 [00:01<00:00, 1.73s/it]" } }, "152aa8231b3f4b7db4e6f27bb61065b9": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "27b48982ae244c66bccd855e7e33a00d": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "a896cb94d1fc4641ad224c7a9ca619b1": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "be5d02d7001a4a25a848d6b0c582b42b": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "609437910022418e83543fe6c3d16501": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "c3b5ef42dae64f95a2ef055db7e50fa8": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "ee0f0aff0c254b82854d1a6f4b367161": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "fd90085d416440579f409cdd9b29e053": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_fdd55d56775a4641925fc88e43f946b8", "IPY_MODEL_a54ebe30a12b4c21b67b2f8d43aea2d3", "IPY_MODEL_6f041f053ff240cbbb52f6d117ebfcf1" ], "layout": "IPY_MODEL_b7d5f8aaee0047178661e58aa59433ee" } }, "fdd55d56775a4641925fc88e43f946b8": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_563ca6cac1594ba0a7c168ee31ccc982", "placeholder": "​", "style": "IPY_MODEL_e10e7830d1404baea72e81e0df295239", "value": "Generating train split: " } }, "a54ebe30a12b4c21b67b2f8d43aea2d3": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "info", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_acc07aa077fd49d38d74e0e10fa3991a", "max": 1, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_441dfdd43ef0442894c19cad81a478db", "value": 1 } }, "6f041f053ff240cbbb52f6d117ebfcf1": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_9efe7ac9ee0946388eeb85f9a4603ffe", "placeholder": "​", "style": "IPY_MODEL_ae9996dcefcf4cfc8ea2f7488629c5ec", "value": " 15011/0 [00:01<00:00, 14307.31 examples/s]" } }, "b7d5f8aaee0047178661e58aa59433ee": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": "hidden", "width": null } }, "563ca6cac1594ba0a7c168ee31ccc982": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e10e7830d1404baea72e81e0df295239": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "acc07aa077fd49d38d74e0e10fa3991a": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": "20px" } }, "441dfdd43ef0442894c19cad81a478db": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "9efe7ac9ee0946388eeb85f9a4603ffe": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "ae9996dcefcf4cfc8ea2f7488629c5ec": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "db463b34f98c488e8282d7a59c421347": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_2ba524e48f35421c9afc9cc81e9ce3fb", "IPY_MODEL_1829c96471c64a03a4872e1b7a16551c", "IPY_MODEL_f639a121fab0407595ebdf04015d9267" ], "layout": "IPY_MODEL_fa5ff44ae5f64ce4bbf9b59e300b7ba3" } }, "2ba524e48f35421c9afc9cc81e9ce3fb": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_e0d701b8c09744858db4fda6011b69df", "placeholder": "​", "style": "IPY_MODEL_7ef61952e0f2463bad3dd1a814bfb95a", "value": "100%" } }, "1829c96471c64a03a4872e1b7a16551c": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_75263b1d19ce4cb0b95cacb6138b6fcb", "max": 1, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_c3f35ce818e34b05b247fae76200c02e", "value": 1 } }, "f639a121fab0407595ebdf04015d9267": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_be8dddb19dde441da1193d5f8738413d", "placeholder": "​", "style": "IPY_MODEL_4be8d304ddce49319488f20ab36e7f53", "value": " 1/1 [00:00<00:00, 22.58it/s]" } }, "fa5ff44ae5f64ce4bbf9b59e300b7ba3": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e0d701b8c09744858db4fda6011b69df": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "7ef61952e0f2463bad3dd1a814bfb95a": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "75263b1d19ce4cb0b95cacb6138b6fcb": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "c3f35ce818e34b05b247fae76200c02e": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "be8dddb19dde441da1193d5f8738413d": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "4be8d304ddce49319488f20ab36e7f53": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "984896bacc39446091e2b6573facfa3e": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_f15179aad6ad44dfa2dfbd703dc7c319", "IPY_MODEL_f92c68902b984e648337da7e63fcc775", "IPY_MODEL_5ce279d7dca74e55acae1c70efae6cd5" ], "layout": "IPY_MODEL_b187d889a6654fd7ad948d66a8bce197" } }, "f15179aad6ad44dfa2dfbd703dc7c319": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_e131a64ebaa84a809754b3c762525c37", "placeholder": "​", "style": "IPY_MODEL_72819a3c606c41f19d1493539d64aff5", "value": "Downloading (…)lve/main/config.json: 100%" } }, "f92c68902b984e648337da7e63fcc775": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_6761867d6e2b415b932bebd33a3144f3", "max": 506, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_72c9aa3bb9614dfc8f316bc45a3455a0", "value": 506 } }, "5ce279d7dca74e55acae1c70efae6cd5": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_58e5e895215d4910b55d8cbe1417a111", "placeholder": "​", "style": "IPY_MODEL_b6bad7f690c54cb5814d09081e306393", "value": " 506/506 [00:00<00:00, 31.8kB/s]" } }, "b187d889a6654fd7ad948d66a8bce197": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e131a64ebaa84a809754b3c762525c37": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "72819a3c606c41f19d1493539d64aff5": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "6761867d6e2b415b932bebd33a3144f3": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "72c9aa3bb9614dfc8f316bc45a3455a0": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "58e5e895215d4910b55d8cbe1417a111": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "b6bad7f690c54cb5814d09081e306393": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "ae3921fcb8e44862a42031f284f608c3": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_aaac19cd3f524821a656ceffa57fb337", "IPY_MODEL_a77af2c0771b4a20b42dade07c40021c", "IPY_MODEL_7e5ced10225544399add5e68dda80595" ], "layout": "IPY_MODEL_ccf73891ca1941ca8b77c87ba84058f8" } }, "aaac19cd3f524821a656ceffa57fb337": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_bfc1e1979f0741a09e6c9f4181ae88e7", "placeholder": "​", "style": "IPY_MODEL_8ce25329fdf04a6e8c4ad8097975111e", "value": "Downloading pytorch_model.bin: 100%" } }, "a77af2c0771b4a20b42dade07c40021c": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_479d18fc0f9448a49b07b93ac608ae1d", "max": 6853038093, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_85ffc3306cb4474aa5bfac22f6356352", "value": 6853038093 } }, "7e5ced10225544399add5e68dda80595": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_0d5765bde1fa470eb087df5fe1e45e56", "placeholder": "​", "style": "IPY_MODEL_6873f0a521cd4e4390563e27046c1aa6", "value": " 6.85G/6.85G [00:35<00:00, 230MB/s]" } }, "ccf73891ca1941ca8b77c87ba84058f8": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "bfc1e1979f0741a09e6c9f4181ae88e7": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "8ce25329fdf04a6e8c4ad8097975111e": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "479d18fc0f9448a49b07b93ac608ae1d": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "85ffc3306cb4474aa5bfac22f6356352": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "0d5765bde1fa470eb087df5fe1e45e56": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "6873f0a521cd4e4390563e27046c1aa6": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "0832a0aa65ac4f70ba2f303f9b867d25": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_436e9deafae04955a13f2ff0fc0e6348", "IPY_MODEL_83427a912e3740008c0a7ffb9d20a1bd", "IPY_MODEL_084f196c105c4c428253e0ce6dce8a21" ], "layout": "IPY_MODEL_403990397a4041abb8aff7bdcc5d8e8a" } }, "436e9deafae04955a13f2ff0fc0e6348": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f957482962954f03b383d7c4f93c7b04", "placeholder": "​", "style": "IPY_MODEL_19a731496867404b8888986f3d9c83ef", "value": "Downloading (…)neration_config.json: 100%" } }, "83427a912e3740008c0a7ffb9d20a1bd": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_e0b09d3aa9ae4719bcdfad8e75ad86c5", "max": 137, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_9d41c197843644858919a396dc927936", "value": 137 } }, "084f196c105c4c428253e0ce6dce8a21": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_67f9f877dee04d538ffbe2de6c4dd181", "placeholder": "​", "style": "IPY_MODEL_001bb332cdb94a0c96dd2a322ff8718f", "value": " 137/137 [00:00<00:00, 9.29kB/s]" } }, "403990397a4041abb8aff7bdcc5d8e8a": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "f957482962954f03b383d7c4f93c7b04": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "19a731496867404b8888986f3d9c83ef": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "e0b09d3aa9ae4719bcdfad8e75ad86c5": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "9d41c197843644858919a396dc927936": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "67f9f877dee04d538ffbe2de6c4dd181": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "001bb332cdb94a0c96dd2a322ff8718f": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "310970764a4a4c9382e2334a4abbbd77": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_2cd59f5acc4948988814eb9b3d909aed", "IPY_MODEL_8939309032ca4787a9d3c7b54b889926", "IPY_MODEL_fcd31f8d1e5a482d9267ab63fd723e86" ], "layout": "IPY_MODEL_2d126272a4284ba9af39de65e9680fc8" } }, "2cd59f5acc4948988814eb9b3d909aed": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_d8a0e85fa9fc4a9291f9303861fc9efc", "placeholder": "​", "style": "IPY_MODEL_d21f1cfb275b4bb38baf7022665e0208", "value": "Map: 100%" } }, "8939309032ca4787a9d3c7b54b889926": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_81e25933b9af43a492a6da4a5e3560f6", "max": 15011, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_5fe27494333b4e07a632769696744210", "value": 15011 } }, "fcd31f8d1e5a482d9267ab63fd723e86": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_35ca13104a3e4263a192889362dbc05e", "placeholder": "​", "style": "IPY_MODEL_85ae96c237754c4db014e5c09898e162", "value": " 14994/15011 [00:44<00:00, 638.52 examples/s]" } }, "2d126272a4284ba9af39de65e9680fc8": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": "hidden", "width": null } }, "d8a0e85fa9fc4a9291f9303861fc9efc": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "d21f1cfb275b4bb38baf7022665e0208": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "81e25933b9af43a492a6da4a5e3560f6": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "5fe27494333b4e07a632769696744210": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "35ca13104a3e4263a192889362dbc05e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "85ae96c237754c4db014e5c09898e162": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "YSW4FUQPwIYu", "outputId": "421da5b9-be5e-4661-bf98-6c57fbf1f6fd" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Cloning into 'alpaca-lora'...\n", "remote: Enumerating objects: 607, done.\u001b[K\n", "remote: Counting objects: 100% (51/51), done.\u001b[K\n", "remote: Compressing objects: 100% (32/32), done.\u001b[K\n", "remote: Total 607 (delta 28), reused 33 (delta 19), pack-reused 556\u001b[K\n", "Receiving objects: 100% (607/607), 27.78 MiB | 5.67 MiB/s, done.\n", "Resolving deltas: 100% (360/360), done.\n" ] } ], "source": [ "!git clone https://github.com/tloen/alpaca-lora.git" ] }, { "cell_type": "markdown", "source": [ "# Alpaca OpenLLaMa 3B LoRA" ], "metadata": { "id": "Gzg8SopX8EWH" } }, { "cell_type": "code", "source": [ "%cd alpaca-lora/" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "s1xm2uERx_st", "outputId": "c77b42e1-202b-45ac-aa2c-3fdf2bcde155" }, "execution_count": 2, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "/content/alpaca-lora\n" ] } ] }, { "cell_type": "code", "source": [ "!pip install -q datasets loralib sentencepiece\n", "\n", "!pip install -q git+https://github.com/huggingface/transformers.git\n", "!pip install -q git+https://github.com/huggingface/peft.git" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "JCB9UzMVwsSM", "outputId": "7f0688e4-f360-4da6-a4d3-0d59d4135649" }, "execution_count": 5, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m39.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Building wheel for transformers (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n" ] } ] }, { "cell_type": "code", "source": [ "!pip install bitsandbytes" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "qCnXTszZxE2T", "outputId": "e619573c-3ac2-4ac3-bed5-caab57d00f9a" }, "execution_count": 6, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", "Collecting bitsandbytes\n", " Downloading bitsandbytes-0.39.0-py3-none-any.whl (92.2 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.2/92.2 MB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hInstalling collected packages: bitsandbytes\n", "Successfully installed bitsandbytes-0.39.0\n" ] } ] }, { "cell_type": "code", "source": [ "from google.colab import drive\n", "drive.mount('/content/drive')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "NyxvpDjcrUvv", "outputId": "abe581a2-70ce-4956-b3dd-593fa4f3d8ef" }, "execution_count": 8, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Mounted at /content/drive\n" ] } ] }, { "cell_type": "markdown", "source": [ "## Data Check" ], "metadata": { "id": "9w0aSCzhxxQf" } }, { "cell_type": "code", "source": [ "from datasets import load_dataset\n", "from transformers import LlamaTokenizer\n", "\n", "\n", "tokenizer = LlamaTokenizer.from_pretrained(\"openlm-research/open_llama_3b_600bt_preview\", add_eos_token=True)\n", "tokenizer.pad_token = tokenizer.eos_token\n", "tokenizer.pad_token_id = tokenizer.eos_token_id\n", "\n", "data = load_dataset(\"json\", data_files=\"/content/drive/MyDrive/alpaca-data.json\")\n", "\n", "\n", "def generate_prompt(data_point):\n", " # sorry about the formatting disaster gotta move fast\n", " if data_point[\"input\"]:\n", " return f\"\"\"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n", "\n", "### Instruction:\n", "{data_point[\"instruction\"]}\n", "\n", "### Input:\n", "{data_point[\"input\"]}\n", "\n", "### Response:\n", "{data_point[\"output\"]}\"\"\"\n", " else:\n", " return f\"\"\"Below is an instruction that describes a task. Write a response that appropriately completes the request.\n", "\n", "### Instruction:\n", "{data_point[\"instruction\"]}\n", "\n", "### Response:\n", "{data_point[\"output\"]}\"\"\"" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 150, "referenced_widgets": [ "76eba77b0fc9499e9b4015393156153e", "c42123767d4849ffbcdd48f09580ea13", "e89f07c1f5744328b071f96a3bdc9532", "da36c71ba5bc469eb32bde730c5038d3", "6cc6b7c564fe48f086152c78f8a9a915", "c6a3adfa76dd4bbd8cc5e75ee28ef3e8", "0c65f0e913b74de88ab712d65b9027f7", "b07c7f3b6bc24f10821b88e254e88e43", "42ea2834ee074029abe8cf8f9ff79a16", "ec4205b0491a4f53ad1cba42689d59c4", "f8b9df09c8c54d53a22310e64e30772f", "e69b8912d3384180842ff0ead6096803", "550a2ef0612e4d8396462d1b1c43608d", "f8d82b4d016f46ca8bab1a2fe591861e", "975d908a1874425198e0d62ae884a3ef", "152aa8231b3f4b7db4e6f27bb61065b9", "27b48982ae244c66bccd855e7e33a00d", "a896cb94d1fc4641ad224c7a9ca619b1", "be5d02d7001a4a25a848d6b0c582b42b", "609437910022418e83543fe6c3d16501", "c3b5ef42dae64f95a2ef055db7e50fa8", "ee0f0aff0c254b82854d1a6f4b367161", "fd90085d416440579f409cdd9b29e053", "fdd55d56775a4641925fc88e43f946b8", "a54ebe30a12b4c21b67b2f8d43aea2d3", "6f041f053ff240cbbb52f6d117ebfcf1", "b7d5f8aaee0047178661e58aa59433ee", "563ca6cac1594ba0a7c168ee31ccc982", "e10e7830d1404baea72e81e0df295239", "acc07aa077fd49d38d74e0e10fa3991a", "441dfdd43ef0442894c19cad81a478db", "9efe7ac9ee0946388eeb85f9a4603ffe", "ae9996dcefcf4cfc8ea2f7488629c5ec", "db463b34f98c488e8282d7a59c421347", "2ba524e48f35421c9afc9cc81e9ce3fb", "1829c96471c64a03a4872e1b7a16551c", "f639a121fab0407595ebdf04015d9267", "fa5ff44ae5f64ce4bbf9b59e300b7ba3", "e0d701b8c09744858db4fda6011b69df", "7ef61952e0f2463bad3dd1a814bfb95a", "75263b1d19ce4cb0b95cacb6138b6fcb", "c3f35ce818e34b05b247fae76200c02e", "be8dddb19dde441da1193d5f8738413d", "4be8d304ddce49319488f20ab36e7f53" ] }, "id": "OdgRTo5YxyRL", "outputId": "e6e64014-796b-4fa9-9635-3cb29f2dab7d" }, "execution_count": 9, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Downloading and preparing dataset json/default to /root/.cache/huggingface/datasets/json/default-e726d3f1eee28f16/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4...\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "Downloading data files: 0%| | 0/1 [00:00" ], "text/html": [ "\n", "
\n", " \n", " \n", " [ 3/119 01:48 < 3:29:02, 0.01 it/s, Epoch 0.02/1]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining Loss
12.421700

" ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ "\u001b[31m╭─\u001b[0m\u001b[31m──────────────────────────────\u001b[0m\u001b[31m \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m \u001b[0m\u001b[31m───────────────────────────────\u001b[0m\u001b[31m─╮\u001b[0m\n", "\u001b[31m│\u001b[0m in \u001b[92m\u001b[0m:\u001b[94m27\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/\u001b[0m\u001b[1;33mtrainer.py\u001b[0m:\u001b[94m1696\u001b[0m in \u001b[92mtrain\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1693 \u001b[0m\u001b[2m│ │ \u001b[0minner_training_loop = find_executable_batch_size( \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1694 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[96mself\u001b[0m._inner_training_loop, \u001b[96mself\u001b[0m._train_batch_size, args.auto_find_batch_size \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1695 \u001b[0m\u001b[2m│ │ \u001b[0m) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1696 \u001b[2m│ │ \u001b[0m\u001b[94mreturn\u001b[0m inner_training_loop( \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1697 \u001b[0m\u001b[2m│ │ │ \u001b[0margs=args, \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1698 \u001b[0m\u001b[2m│ │ │ \u001b[0mresume_from_checkpoint=resume_from_checkpoint, \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1699 \u001b[0m\u001b[2m│ │ │ \u001b[0mtrial=trial, \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/\u001b[0m\u001b[1;33mtrainer.py\u001b[0m:\u001b[94m1973\u001b[0m in \u001b[92m_inner_training_loop\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1970 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0m\u001b[94mwith\u001b[0m model.no_sync(): \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1971 \u001b[0m\u001b[2m│ │ │ │ │ │ \u001b[0mtr_loss_step = \u001b[96mself\u001b[0m.training_step(model, inputs) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1972 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[94melse\u001b[0m: \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1973 \u001b[2m│ │ │ │ │ \u001b[0mtr_loss_step = \u001b[96mself\u001b[0m.training_step(model, inputs) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1974 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1975 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[94mif\u001b[0m ( \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1976 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0margs.logging_nan_inf_filter \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/\u001b[0m\u001b[1;33mtrainer.py\u001b[0m:\u001b[94m2787\u001b[0m in \u001b[92mtraining_step\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m2784 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mreturn\u001b[0m loss_mb.reduce_mean().detach().to(\u001b[96mself\u001b[0m.args.device) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m2785 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m2786 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mwith\u001b[0m \u001b[96mself\u001b[0m.compute_loss_context_manager(): \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m2787 \u001b[2m│ │ │ \u001b[0mloss = \u001b[96mself\u001b[0m.compute_loss(model, inputs) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m2788 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m2789 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mself\u001b[0m.args.n_gpu > \u001b[94m1\u001b[0m: \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m2790 \u001b[0m\u001b[2m│ │ │ \u001b[0mloss = loss.mean() \u001b[2m# mean() to average on multi-gpu parallel training\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/\u001b[0m\u001b[1;33mtrainer.py\u001b[0m:\u001b[94m2819\u001b[0m in \u001b[92mcompute_loss\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m2816 \u001b[0m\u001b[2m│ │ │ \u001b[0mlabels = inputs.pop(\u001b[33m\"\u001b[0m\u001b[33mlabels\u001b[0m\u001b[33m\"\u001b[0m) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m2817 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94melse\u001b[0m: \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m2818 \u001b[0m\u001b[2m│ │ │ \u001b[0mlabels = \u001b[94mNone\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m2819 \u001b[2m│ │ \u001b[0moutputs = model(**inputs) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m2820 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# Save past state if it exists\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m2821 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# TODO: this needs to be fixed and made cleaner later.\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m2822 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mself\u001b[0m.args.past_index >= \u001b[94m0\u001b[0m: \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/\u001b[0m\u001b[1;33mmodule.py\u001b[0m:\u001b[94m1501\u001b[0m in \u001b[92m_call_impl\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1498 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m (\u001b[96mself\u001b[0m._backward_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._backward_pre_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._forward_hooks \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1499 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[95mor\u001b[0m _global_backward_pre_hooks \u001b[95mor\u001b[0m _global_backward_hooks \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1500 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[95mor\u001b[0m _global_forward_hooks \u001b[95mor\u001b[0m _global_forward_pre_hooks): \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1501 \u001b[2m│ │ │ \u001b[0m\u001b[94mreturn\u001b[0m forward_call(*args, **kwargs) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1502 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# Do not call functions when jit is used\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1503 \u001b[0m\u001b[2m│ │ \u001b[0mfull_backward_hooks, non_full_backward_hooks = [], [] \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1504 \u001b[0m\u001b[2m│ │ \u001b[0mbackward_pre_hooks = [] \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/peft/\u001b[0m\u001b[1;33mpeft_model.py\u001b[0m:\u001b[94m686\u001b[0m in \u001b[92mforward\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m 683 \u001b[0m\u001b[2m│ \u001b[0m): \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m 684 \u001b[0m\u001b[2m│ │ \u001b[0mpeft_config = \u001b[96mself\u001b[0m.active_peft_config \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m 685 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m \u001b[96misinstance\u001b[0m(peft_config, PromptLearningConfig): \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m 686 \u001b[2m│ │ │ \u001b[0m\u001b[94mreturn\u001b[0m \u001b[96mself\u001b[0m.base_model( \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m 687 \u001b[0m\u001b[2m│ │ │ │ \u001b[0minput_ids=input_ids, \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m 688 \u001b[0m\u001b[2m│ │ │ │ \u001b[0mattention_mask=attention_mask, \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m 689 \u001b[0m\u001b[2m│ │ │ │ \u001b[0minputs_embeds=inputs_embeds, \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/\u001b[0m\u001b[1;33mmodule.py\u001b[0m:\u001b[94m1501\u001b[0m in \u001b[92m_call_impl\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1498 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m (\u001b[96mself\u001b[0m._backward_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._backward_pre_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._forward_hooks \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1499 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[95mor\u001b[0m _global_backward_pre_hooks \u001b[95mor\u001b[0m _global_backward_hooks \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1500 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[95mor\u001b[0m _global_forward_hooks \u001b[95mor\u001b[0m _global_forward_pre_hooks): \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1501 \u001b[2m│ │ │ \u001b[0m\u001b[94mreturn\u001b[0m forward_call(*args, **kwargs) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1502 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# Do not call functions when jit is used\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1503 \u001b[0m\u001b[2m│ │ \u001b[0mfull_backward_hooks, non_full_backward_hooks = [], [] \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1504 \u001b[0m\u001b[2m│ │ \u001b[0mbackward_pre_hooks = [] \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/accelerate/\u001b[0m\u001b[1;33mhooks.py\u001b[0m:\u001b[94m165\u001b[0m in \u001b[92mnew_forward\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m162 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mwith\u001b[0m torch.no_grad(): \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m163 \u001b[0m\u001b[2m│ │ │ │ \u001b[0moutput = old_forward(*args, **kwargs) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m164 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94melse\u001b[0m: \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m165 \u001b[2m│ │ │ \u001b[0moutput = old_forward(*args, **kwargs) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m166 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mreturn\u001b[0m module._hf_hook.post_forward(module, output) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m167 \u001b[0m\u001b[2m│ \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m168 \u001b[0m\u001b[2m│ \u001b[0mmodule.forward = new_forward \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/models/llama/\u001b[0m\u001b[1;33mmodeling_llama.py\u001b[0m:\u001b[94m687\u001b[0m in \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[92mforward\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m684 \u001b[0m\u001b[2m│ │ \u001b[0mreturn_dict = return_dict \u001b[94mif\u001b[0m return_dict \u001b[95mis\u001b[0m \u001b[95mnot\u001b[0m \u001b[94mNone\u001b[0m \u001b[94melse\u001b[0m \u001b[96mself\u001b[0m.config.use_return \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m685 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m686 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m687 \u001b[2m│ │ \u001b[0moutputs = \u001b[96mself\u001b[0m.model( \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m688 \u001b[0m\u001b[2m│ │ │ \u001b[0minput_ids=input_ids, \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m689 \u001b[0m\u001b[2m│ │ │ \u001b[0mattention_mask=attention_mask, \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m690 \u001b[0m\u001b[2m│ │ │ \u001b[0mposition_ids=position_ids, \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/\u001b[0m\u001b[1;33mmodule.py\u001b[0m:\u001b[94m1501\u001b[0m in \u001b[92m_call_impl\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1498 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m (\u001b[96mself\u001b[0m._backward_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._backward_pre_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._forward_hooks \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1499 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[95mor\u001b[0m _global_backward_pre_hooks \u001b[95mor\u001b[0m _global_backward_hooks \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1500 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[95mor\u001b[0m _global_forward_hooks \u001b[95mor\u001b[0m _global_forward_pre_hooks): \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1501 \u001b[2m│ │ │ \u001b[0m\u001b[94mreturn\u001b[0m forward_call(*args, **kwargs) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1502 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# Do not call functions when jit is used\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1503 \u001b[0m\u001b[2m│ │ \u001b[0mfull_backward_hooks, non_full_backward_hooks = [], [] \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1504 \u001b[0m\u001b[2m│ │ \u001b[0mbackward_pre_hooks = [] \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/accelerate/\u001b[0m\u001b[1;33mhooks.py\u001b[0m:\u001b[94m165\u001b[0m in \u001b[92mnew_forward\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m162 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mwith\u001b[0m torch.no_grad(): \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m163 \u001b[0m\u001b[2m│ │ │ │ \u001b[0moutput = old_forward(*args, **kwargs) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m164 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94melse\u001b[0m: \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m165 \u001b[2m│ │ │ \u001b[0moutput = old_forward(*args, **kwargs) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m166 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mreturn\u001b[0m module._hf_hook.post_forward(module, output) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m167 \u001b[0m\u001b[2m│ \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m168 \u001b[0m\u001b[2m│ \u001b[0mmodule.forward = new_forward \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/models/llama/\u001b[0m\u001b[1;33mmodeling_llama.py\u001b[0m:\u001b[94m569\u001b[0m in \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[92mforward\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m566 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m567 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0m\u001b[94mreturn\u001b[0m custom_forward \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m568 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m569 \u001b[2m│ │ │ │ \u001b[0mlayer_outputs = torch.utils.checkpoint.checkpoint( \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m570 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0mcreate_custom_forward(decoder_layer), \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m571 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0mhidden_states, \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m572 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0mattention_mask, \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/utils/\u001b[0m\u001b[1;33mcheckpoint.py\u001b[0m:\u001b[94m249\u001b[0m in \u001b[92mcheckpoint\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m246 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mraise\u001b[0m \u001b[96mValueError\u001b[0m(\u001b[33m\"\u001b[0m\u001b[33mUnexpected keyword arguments: \u001b[0m\u001b[33m\"\u001b[0m + \u001b[33m\"\u001b[0m\u001b[33m,\u001b[0m\u001b[33m\"\u001b[0m.join(arg \u001b[94mfor\u001b[0m arg \u001b[95min\u001b[0m kwar \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m247 \u001b[0m\u001b[2m│ \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m248 \u001b[0m\u001b[2m│ \u001b[0m\u001b[94mif\u001b[0m use_reentrant: \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m249 \u001b[2m│ │ \u001b[0m\u001b[94mreturn\u001b[0m CheckpointFunction.apply(function, preserve, *args) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m250 \u001b[0m\u001b[2m│ \u001b[0m\u001b[94melse\u001b[0m: \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m251 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mreturn\u001b[0m _checkpoint_without_reentrant( \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m252 \u001b[0m\u001b[2m│ │ │ \u001b[0mfunction, \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/autograd/\u001b[0m\u001b[1;33mfunction.py\u001b[0m:\u001b[94m506\u001b[0m in \u001b[92mapply\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m503 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m torch._C._are_functorch_transforms_active(): \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m504 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[2m# See NOTE: [functorch vjp and autograd interaction]\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m505 \u001b[0m\u001b[2m│ │ │ \u001b[0margs = _functorch.utils.unwrap_dead_wrappers(args) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m506 \u001b[2m│ │ │ \u001b[0m\u001b[94mreturn\u001b[0m \u001b[96msuper\u001b[0m().apply(*args, **kwargs) \u001b[2m# type: ignore[misc]\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m507 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m508 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mcls\u001b[0m.setup_context == _SingleLevelFunction.setup_context: \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m509 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mraise\u001b[0m \u001b[96mRuntimeError\u001b[0m( \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/utils/\u001b[0m\u001b[1;33mcheckpoint.py\u001b[0m:\u001b[94m107\u001b[0m in \u001b[92mforward\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m104 \u001b[0m\u001b[2m│ │ \u001b[0mctx.save_for_backward(*tensor_inputs) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m105 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m106 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mwith\u001b[0m torch.no_grad(): \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m107 \u001b[2m│ │ │ \u001b[0moutputs = run_function(*args) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m108 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mreturn\u001b[0m outputs \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m109 \u001b[0m\u001b[2m│ \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m110 \u001b[0m\u001b[2m│ \u001b[0m\u001b[1;95m@staticmethod\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/models/llama/\u001b[0m\u001b[1;33mmodeling_llama.py\u001b[0m:\u001b[94m565\u001b[0m in \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[92mcustom_forward\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m562 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[94mdef\u001b[0m \u001b[92mcreate_custom_forward\u001b[0m(module): \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m563 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0m\u001b[94mdef\u001b[0m \u001b[92mcustom_forward\u001b[0m(*inputs): \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m564 \u001b[0m\u001b[2m│ │ │ │ │ │ \u001b[0m\u001b[2m# None for past_key_value\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m565 \u001b[2m│ │ │ │ │ │ \u001b[0m\u001b[94mreturn\u001b[0m module(*inputs, output_attentions, \u001b[94mNone\u001b[0m) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m566 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m567 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0m\u001b[94mreturn\u001b[0m custom_forward \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m568 \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/\u001b[0m\u001b[1;33mmodule.py\u001b[0m:\u001b[94m1501\u001b[0m in \u001b[92m_call_impl\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1498 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m (\u001b[96mself\u001b[0m._backward_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._backward_pre_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._forward_hooks \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1499 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[95mor\u001b[0m _global_backward_pre_hooks \u001b[95mor\u001b[0m _global_backward_hooks \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1500 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[95mor\u001b[0m _global_forward_hooks \u001b[95mor\u001b[0m _global_forward_pre_hooks): \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1501 \u001b[2m│ │ │ \u001b[0m\u001b[94mreturn\u001b[0m forward_call(*args, **kwargs) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1502 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# Do not call functions when jit is used\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1503 \u001b[0m\u001b[2m│ │ \u001b[0mfull_backward_hooks, non_full_backward_hooks = [], [] \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1504 \u001b[0m\u001b[2m│ │ \u001b[0mbackward_pre_hooks = [] \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/accelerate/\u001b[0m\u001b[1;33mhooks.py\u001b[0m:\u001b[94m165\u001b[0m in \u001b[92mnew_forward\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m162 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mwith\u001b[0m torch.no_grad(): \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m163 \u001b[0m\u001b[2m│ │ │ │ \u001b[0moutput = old_forward(*args, **kwargs) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m164 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94melse\u001b[0m: \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m165 \u001b[2m│ │ │ \u001b[0moutput = old_forward(*args, **kwargs) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m166 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mreturn\u001b[0m module._hf_hook.post_forward(module, output) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m167 \u001b[0m\u001b[2m│ \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m168 \u001b[0m\u001b[2m│ \u001b[0mmodule.forward = new_forward \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/models/llama/\u001b[0m\u001b[1;33mmodeling_llama.py\u001b[0m:\u001b[94m292\u001b[0m in \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[92mforward\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m289 \u001b[0m\u001b[2m│ │ \u001b[0mhidden_states = \u001b[96mself\u001b[0m.input_layernorm(hidden_states) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m290 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m291 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# Self Attention\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m292 \u001b[2m│ │ \u001b[0mhidden_states, self_attn_weights, present_key_value = \u001b[96mself\u001b[0m.self_attn( \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m293 \u001b[0m\u001b[2m│ │ │ \u001b[0mhidden_states=hidden_states, \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m294 \u001b[0m\u001b[2m│ │ │ \u001b[0mattention_mask=attention_mask, \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m295 \u001b[0m\u001b[2m│ │ │ \u001b[0mposition_ids=position_ids, \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/\u001b[0m\u001b[1;33mmodule.py\u001b[0m:\u001b[94m1501\u001b[0m in \u001b[92m_call_impl\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1498 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m (\u001b[96mself\u001b[0m._backward_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._backward_pre_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._forward_hooks \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1499 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[95mor\u001b[0m _global_backward_pre_hooks \u001b[95mor\u001b[0m _global_backward_hooks \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1500 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[95mor\u001b[0m _global_forward_hooks \u001b[95mor\u001b[0m _global_forward_pre_hooks): \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1501 \u001b[2m│ │ │ \u001b[0m\u001b[94mreturn\u001b[0m forward_call(*args, **kwargs) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1502 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# Do not call functions when jit is used\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1503 \u001b[0m\u001b[2m│ │ \u001b[0mfull_backward_hooks, non_full_backward_hooks = [], [] \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1504 \u001b[0m\u001b[2m│ │ \u001b[0mbackward_pre_hooks = [] \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/accelerate/\u001b[0m\u001b[1;33mhooks.py\u001b[0m:\u001b[94m165\u001b[0m in \u001b[92mnew_forward\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m162 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mwith\u001b[0m torch.no_grad(): \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m163 \u001b[0m\u001b[2m│ │ │ │ \u001b[0moutput = old_forward(*args, **kwargs) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m164 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94melse\u001b[0m: \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m165 \u001b[2m│ │ │ \u001b[0moutput = old_forward(*args, **kwargs) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m166 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mreturn\u001b[0m module._hf_hook.post_forward(module, output) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m167 \u001b[0m\u001b[2m│ \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m168 \u001b[0m\u001b[2m│ \u001b[0mmodule.forward = new_forward \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/models/llama/\u001b[0m\u001b[1;33mmodeling_llama.py\u001b[0m:\u001b[94m194\u001b[0m in \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[92mforward\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m191 \u001b[0m\u001b[2m│ \u001b[0m) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m192 \u001b[0m\u001b[2m│ │ \u001b[0mbsz, q_len, _ = hidden_states.size() \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m193 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m194 \u001b[2m│ │ \u001b[0mquery_states = \u001b[96mself\u001b[0m.q_proj(hidden_states).view(bsz, q_len, \u001b[96mself\u001b[0m.num_heads, \u001b[96mself\u001b[0m. \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m195 \u001b[0m\u001b[2m│ │ \u001b[0mkey_states = \u001b[96mself\u001b[0m.k_proj(hidden_states).view(bsz, q_len, \u001b[96mself\u001b[0m.num_heads, \u001b[96mself\u001b[0m.he \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m196 \u001b[0m\u001b[2m│ │ \u001b[0mvalue_states = \u001b[96mself\u001b[0m.v_proj(hidden_states).view(bsz, q_len, \u001b[96mself\u001b[0m.num_heads, \u001b[96mself\u001b[0m. \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m197 \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/\u001b[0m\u001b[1;33mmodule.py\u001b[0m:\u001b[94m1501\u001b[0m in \u001b[92m_call_impl\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1498 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m (\u001b[96mself\u001b[0m._backward_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._backward_pre_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._forward_hooks \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1499 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[95mor\u001b[0m _global_backward_pre_hooks \u001b[95mor\u001b[0m _global_backward_hooks \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1500 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[95mor\u001b[0m _global_forward_hooks \u001b[95mor\u001b[0m _global_forward_pre_hooks): \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1501 \u001b[2m│ │ │ \u001b[0m\u001b[94mreturn\u001b[0m forward_call(*args, **kwargs) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1502 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# Do not call functions when jit is used\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1503 \u001b[0m\u001b[2m│ │ \u001b[0mfull_backward_hooks, non_full_backward_hooks = [], [] \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m1504 \u001b[0m\u001b[2m│ │ \u001b[0mbackward_pre_hooks = [] \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/peft/tuners/\u001b[0m\u001b[1;33mlora.py\u001b[0m:\u001b[94m709\u001b[0m in \u001b[92mforward\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m706 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[96mself\u001b[0m.active_adapter = adapter_name \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m707 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m708 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mdef\u001b[0m \u001b[92mforward\u001b[0m(\u001b[96mself\u001b[0m, x: torch.Tensor): \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m709 \u001b[2m│ │ │ \u001b[0mresult = \u001b[96msuper\u001b[0m().forward(x) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m710 \u001b[0m\u001b[2m│ │ │ \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m711 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mself\u001b[0m.disable_adapters \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m.active_adapter \u001b[95mnot\u001b[0m \u001b[95min\u001b[0m \u001b[96mself\u001b[0m.lora_A.keys(): \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m712 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[94mreturn\u001b[0m result \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/bitsandbytes/nn/\u001b[0m\u001b[1;33mmodules.py\u001b[0m:\u001b[94m388\u001b[0m in \u001b[92mforward\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m385 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mself\u001b[0m.bias \u001b[95mis\u001b[0m \u001b[95mnot\u001b[0m \u001b[94mNone\u001b[0m \u001b[95mand\u001b[0m \u001b[96mself\u001b[0m.bias.dtype != x.dtype: \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m386 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[96mself\u001b[0m.bias.data = \u001b[96mself\u001b[0m.bias.data.to(x.dtype) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m387 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m388 \u001b[2m│ │ \u001b[0mout = bnb.matmul(x, \u001b[96mself\u001b[0m.weight, bias=\u001b[96mself\u001b[0m.bias, state=\u001b[96mself\u001b[0m.state) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m389 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m390 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m \u001b[96mself\u001b[0m.state.has_fp16_weights: \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m391 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mself\u001b[0m.state.CB \u001b[95mis\u001b[0m \u001b[95mnot\u001b[0m \u001b[94mNone\u001b[0m \u001b[95mand\u001b[0m \u001b[96mself\u001b[0m.state.CxB \u001b[95mis\u001b[0m \u001b[95mnot\u001b[0m \u001b[94mNone\u001b[0m: \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/bitsandbytes/autograd/\u001b[0m\u001b[1;33m_functions.py\u001b[0m:\u001b[94m559\u001b[0m in \u001b[92mmatmul\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m556 \u001b[0m\u001b[2m│ \u001b[0mstate = state \u001b[95mor\u001b[0m MatmulLtState() \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m557 \u001b[0m\u001b[2m│ \u001b[0m\u001b[94mif\u001b[0m threshold > \u001b[94m0.0\u001b[0m: \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m558 \u001b[0m\u001b[2m│ │ \u001b[0mstate.threshold = threshold \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m559 \u001b[2m│ \u001b[0m\u001b[94mreturn\u001b[0m MatMul8bitLt.apply(A, B, out, bias, state) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m560 \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m561 \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m562 \u001b[0m\u001b[94mdef\u001b[0m \u001b[92mmatmul_4bit\u001b[0m(A: tensor, B: tensor, quant_state: List, out: tensor = \u001b[94mNone\u001b[0m, bias=\u001b[94mNone\u001b[0m): \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/autograd/\u001b[0m\u001b[1;33mfunction.py\u001b[0m:\u001b[94m506\u001b[0m in \u001b[92mapply\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m503 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m torch._C._are_functorch_transforms_active(): \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m504 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[2m# See NOTE: [functorch vjp and autograd interaction]\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m505 \u001b[0m\u001b[2m│ │ │ \u001b[0margs = _functorch.utils.unwrap_dead_wrappers(args) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m506 \u001b[2m│ │ │ \u001b[0m\u001b[94mreturn\u001b[0m \u001b[96msuper\u001b[0m().apply(*args, **kwargs) \u001b[2m# type: ignore[misc]\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m507 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m508 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mcls\u001b[0m.setup_context == _SingleLevelFunction.setup_context: \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m509 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mraise\u001b[0m \u001b[96mRuntimeError\u001b[0m( \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/bitsandbytes/autograd/\u001b[0m\u001b[1;33m_functions.py\u001b[0m:\u001b[94m323\u001b[0m in \u001b[92mforward\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m320 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# 1. Quantize A\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m321 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mlen\u001b[0m(A.shape) == \u001b[94m3\u001b[0m: \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m322 \u001b[0m\u001b[2m│ │ │ \u001b[0mA = A.view(-\u001b[94m1\u001b[0m, A.shape[-\u001b[94m1\u001b[0m]).contiguous() \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m323 \u001b[2m│ │ \u001b[0mCA, CAt, SCA, SCAt, coo_tensorA = F.double_quant(A.to(torch.float16), threshold= \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m324 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m325 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m state.threshold > \u001b[94m0.0\u001b[0m \u001b[95mand\u001b[0m coo_tensorA \u001b[95mis\u001b[0m \u001b[95mnot\u001b[0m \u001b[94mNone\u001b[0m: \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m326 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mif\u001b[0m state.has_fp16_weights: \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/bitsandbytes/\u001b[0m\u001b[1;33mfunctional.py\u001b[0m:\u001b[94m2029\u001b[0m in \u001b[92mdouble_quant\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m2026 \u001b[0m\u001b[2m│ │ │ │ \u001b[0mct.c_int32(rows), \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m2027 \u001b[0m\u001b[2m│ │ │ │ \u001b[0mct.c_int32(cols), \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m2028 \u001b[0m\u001b[2m│ │ │ \u001b[0m) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m2029 \u001b[2m│ │ │ \u001b[0mval, idx = torch.sort(coo_tensor.rowidx) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m2030 \u001b[0m\u001b[2m│ │ │ \u001b[0mcoo_tensor.rowidx = val \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m2031 \u001b[0m\u001b[2m│ │ │ \u001b[0mcoo_tensor.colidx = coo_tensor.colidx[idx] \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m2032 \u001b[0m\u001b[2m│ │ │ \u001b[0mcoo_tensor.values = coo_tensor.values[idx] \u001b[31m│\u001b[0m\n", "\u001b[31m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n", "\u001b[1;91mKeyboardInterrupt\u001b[0m\n" ], "text/html": [ "

╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮\n",
              " in <cell line: 27>:27                                                                            \n",
              "                                                                                                  \n",
              " /usr/local/lib/python3.10/dist-packages/transformers/trainer.py:1696 in train                    \n",
              "                                                                                                  \n",
              "   1693 │   │   inner_training_loop = find_executable_batch_size(                                 \n",
              "   1694 │   │   │   self._inner_training_loop, self._train_batch_size, args.auto_find_batch_size  \n",
              "   1695 │   │   )                                                                                 \n",
              " 1696 │   │   return inner_training_loop(                                                       \n",
              "   1697 │   │   │   args=args,                                                                    \n",
              "   1698 │   │   │   resume_from_checkpoint=resume_from_checkpoint,                                \n",
              "   1699 │   │   │   trial=trial,                                                                  \n",
              "                                                                                                  \n",
              " /usr/local/lib/python3.10/dist-packages/transformers/trainer.py:1973 in _inner_training_loop     \n",
              "                                                                                                  \n",
              "   1970 │   │   │   │   │   with model.no_sync():                                                 \n",
              "   1971 │   │   │   │   │   │   tr_loss_step = self.training_step(model, inputs)                  \n",
              "   1972 │   │   │   │   else:                                                                     \n",
              " 1973 │   │   │   │   │   tr_loss_step = self.training_step(model, inputs)                      \n",
              "   1974 │   │   │   │                                                                             \n",
              "   1975 │   │   │   │   if (                                                                      \n",
              "   1976 │   │   │   │   │   args.logging_nan_inf_filter                                           \n",
              "                                                                                                  \n",
              " /usr/local/lib/python3.10/dist-packages/transformers/trainer.py:2787 in training_step            \n",
              "                                                                                                  \n",
              "   2784 │   │   │   return loss_mb.reduce_mean().detach().to(self.args.device)                    \n",
              "   2785 │   │                                                                                     \n",
              "   2786 │   │   with self.compute_loss_context_manager():                                         \n",
              " 2787 │   │   │   loss = self.compute_loss(model, inputs)                                       \n",
              "   2788 │   │                                                                                     \n",
              "   2789 │   │   if self.args.n_gpu > 1:                                                           \n",
              "   2790 │   │   │   loss = loss.mean()  # mean() to average on multi-gpu parallel training        \n",
              "                                                                                                  \n",
              " /usr/local/lib/python3.10/dist-packages/transformers/trainer.py:2819 in compute_loss             \n",
              "                                                                                                  \n",
              "   2816 │   │   │   labels = inputs.pop(\"labels\")                                                 \n",
              "   2817 │   │   else:                                                                             \n",
              "   2818 │   │   │   labels = None                                                                 \n",
              " 2819 │   │   outputs = model(**inputs)                                                         \n",
              "   2820 │   │   # Save past state if it exists                                                    \n",
              "   2821 │   │   # TODO: this needs to be fixed and made cleaner later.                            \n",
              "   2822 │   │   if self.args.past_index >= 0:                                                     \n",
              "                                                                                                  \n",
              " /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1501 in _call_impl            \n",
              "                                                                                                  \n",
              "   1498 │   │   if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks   \n",
              "   1499 │   │   │   │   or _global_backward_pre_hooks or _global_backward_hooks                   \n",
              "   1500 │   │   │   │   or _global_forward_hooks or _global_forward_pre_hooks):                   \n",
              " 1501 │   │   │   return forward_call(*args, **kwargs)                                          \n",
              "   1502 │   │   # Do not call functions when jit is used                                          \n",
              "   1503 │   │   full_backward_hooks, non_full_backward_hooks = [], []                             \n",
              "   1504 │   │   backward_pre_hooks = []                                                           \n",
              "                                                                                                  \n",
              " /usr/local/lib/python3.10/dist-packages/peft/peft_model.py:686 in forward                        \n",
              "                                                                                                  \n",
              "    683 │   ):                                                                                    \n",
              "    684 │   │   peft_config = self.active_peft_config                                             \n",
              "    685 │   │   if not isinstance(peft_config, PromptLearningConfig):                             \n",
              "  686 │   │   │   return self.base_model(                                                       \n",
              "    687 │   │   │   │   input_ids=input_ids,                                                      \n",
              "    688 │   │   │   │   attention_mask=attention_mask,                                            \n",
              "    689 │   │   │   │   inputs_embeds=inputs_embeds,                                              \n",
              "                                                                                                  \n",
              " /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1501 in _call_impl            \n",
              "                                                                                                  \n",
              "   1498 │   │   if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks   \n",
              "   1499 │   │   │   │   or _global_backward_pre_hooks or _global_backward_hooks                   \n",
              "   1500 │   │   │   │   or _global_forward_hooks or _global_forward_pre_hooks):                   \n",
              " 1501 │   │   │   return forward_call(*args, **kwargs)                                          \n",
              "   1502 │   │   # Do not call functions when jit is used                                          \n",
              "   1503 │   │   full_backward_hooks, non_full_backward_hooks = [], []                             \n",
              "   1504 │   │   backward_pre_hooks = []                                                           \n",
              "                                                                                                  \n",
              " /usr/local/lib/python3.10/dist-packages/accelerate/hooks.py:165 in new_forward                   \n",
              "                                                                                                  \n",
              "   162 │   │   │   with torch.no_grad():                                                          \n",
              "   163 │   │   │   │   output = old_forward(*args, **kwargs)                                      \n",
              "   164 │   │   else:                                                                              \n",
              " 165 │   │   │   output = old_forward(*args, **kwargs)                                          \n",
              "   166 │   │   return module._hf_hook.post_forward(module, output)                                \n",
              "   167 │                                                                                          \n",
              "   168 │   module.forward = new_forward                                                           \n",
              "                                                                                                  \n",
              " /usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py:687 in       \n",
              " forward                                                                                          \n",
              "                                                                                                  \n",
              "   684 │   │   return_dict = return_dict if return_dict is not None else self.config.use_return   \n",
              "   685 │   │                                                                                      \n",
              "   686 │   │   # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)    \n",
              " 687 │   │   outputs = self.model(                                                              \n",
              "   688 │   │   │   input_ids=input_ids,                                                           \n",
              "   689 │   │   │   attention_mask=attention_mask,                                                 \n",
              "   690 │   │   │   position_ids=position_ids,                                                     \n",
              "                                                                                                  \n",
              " /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1501 in _call_impl            \n",
              "                                                                                                  \n",
              "   1498 │   │   if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks   \n",
              "   1499 │   │   │   │   or _global_backward_pre_hooks or _global_backward_hooks                   \n",
              "   1500 │   │   │   │   or _global_forward_hooks or _global_forward_pre_hooks):                   \n",
              " 1501 │   │   │   return forward_call(*args, **kwargs)                                          \n",
              "   1502 │   │   # Do not call functions when jit is used                                          \n",
              "   1503 │   │   full_backward_hooks, non_full_backward_hooks = [], []                             \n",
              "   1504 │   │   backward_pre_hooks = []                                                           \n",
              "                                                                                                  \n",
              " /usr/local/lib/python3.10/dist-packages/accelerate/hooks.py:165 in new_forward                   \n",
              "                                                                                                  \n",
              "   162 │   │   │   with torch.no_grad():                                                          \n",
              "   163 │   │   │   │   output = old_forward(*args, **kwargs)                                      \n",
              "   164 │   │   else:                                                                              \n",
              " 165 │   │   │   output = old_forward(*args, **kwargs)                                          \n",
              "   166 │   │   return module._hf_hook.post_forward(module, output)                                \n",
              "   167 │                                                                                          \n",
              "   168 │   module.forward = new_forward                                                           \n",
              "                                                                                                  \n",
              " /usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py:569 in       \n",
              " forward                                                                                          \n",
              "                                                                                                  \n",
              "   566 │   │   │   │   │                                                                          \n",
              "   567 │   │   │   │   │   return custom_forward                                                  \n",
              "   568 │   │   │   │                                                                              \n",
              " 569 │   │   │   │   layer_outputs = torch.utils.checkpoint.checkpoint(                         \n",
              "   570 │   │   │   │   │   create_custom_forward(decoder_layer),                                  \n",
              "   571 │   │   │   │   │   hidden_states,                                                         \n",
              "   572 │   │   │   │   │   attention_mask,                                                        \n",
              "                                                                                                  \n",
              " /usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:249 in checkpoint              \n",
              "                                                                                                  \n",
              "   246 │   │   raise ValueError(\"Unexpected keyword arguments: \" + \",\".join(arg for arg in kwar   \n",
              "   247 │                                                                                          \n",
              "   248 │   if use_reentrant:                                                                      \n",
              " 249 │   │   return CheckpointFunction.apply(function, preserve, *args)                         \n",
              "   250 │   else:                                                                                  \n",
              "   251 │   │   return _checkpoint_without_reentrant(                                              \n",
              "   252 │   │   │   function,                                                                      \n",
              "                                                                                                  \n",
              " /usr/local/lib/python3.10/dist-packages/torch/autograd/function.py:506 in apply                  \n",
              "                                                                                                  \n",
              "   503 │   │   if not torch._C._are_functorch_transforms_active():                                \n",
              "   504 │   │   │   # See NOTE: [functorch vjp and autograd interaction]                           \n",
              "   505 │   │   │   args = _functorch.utils.unwrap_dead_wrappers(args)                             \n",
              " 506 │   │   │   return super().apply(*args, **kwargs)  # type: ignore[misc]                    \n",
              "   507 │   │                                                                                      \n",
              "   508 │   │   if cls.setup_context == _SingleLevelFunction.setup_context:                        \n",
              "   509 │   │   │   raise RuntimeError(                                                            \n",
              "                                                                                                  \n",
              " /usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:107 in forward                 \n",
              "                                                                                                  \n",
              "   104 │   │   ctx.save_for_backward(*tensor_inputs)                                              \n",
              "   105 │   │                                                                                      \n",
              "   106 │   │   with torch.no_grad():                                                              \n",
              " 107 │   │   │   outputs = run_function(*args)                                                  \n",
              "   108 │   │   return outputs                                                                     \n",
              "   109 │                                                                                          \n",
              "   110 │   @staticmethod                                                                          \n",
              "                                                                                                  \n",
              " /usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py:565 in       \n",
              " custom_forward                                                                                   \n",
              "                                                                                                  \n",
              "   562 │   │   │   │   def create_custom_forward(module):                                         \n",
              "   563 │   │   │   │   │   def custom_forward(*inputs):                                           \n",
              "   564 │   │   │   │   │   │   # None for past_key_value                                          \n",
              " 565 │   │   │   │   │   │   return module(*inputs, output_attentions, None)                    \n",
              "   566 │   │   │   │   │                                                                          \n",
              "   567 │   │   │   │   │   return custom_forward                                                  \n",
              "   568                                                                                            \n",
              "                                                                                                  \n",
              " /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1501 in _call_impl            \n",
              "                                                                                                  \n",
              "   1498 │   │   if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks   \n",
              "   1499 │   │   │   │   or _global_backward_pre_hooks or _global_backward_hooks                   \n",
              "   1500 │   │   │   │   or _global_forward_hooks or _global_forward_pre_hooks):                   \n",
              " 1501 │   │   │   return forward_call(*args, **kwargs)                                          \n",
              "   1502 │   │   # Do not call functions when jit is used                                          \n",
              "   1503 │   │   full_backward_hooks, non_full_backward_hooks = [], []                             \n",
              "   1504 │   │   backward_pre_hooks = []                                                           \n",
              "                                                                                                  \n",
              " /usr/local/lib/python3.10/dist-packages/accelerate/hooks.py:165 in new_forward                   \n",
              "                                                                                                  \n",
              "   162 │   │   │   with torch.no_grad():                                                          \n",
              "   163 │   │   │   │   output = old_forward(*args, **kwargs)                                      \n",
              "   164 │   │   else:                                                                              \n",
              " 165 │   │   │   output = old_forward(*args, **kwargs)                                          \n",
              "   166 │   │   return module._hf_hook.post_forward(module, output)                                \n",
              "   167 │                                                                                          \n",
              "   168 │   module.forward = new_forward                                                           \n",
              "                                                                                                  \n",
              " /usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py:292 in       \n",
              " forward                                                                                          \n",
              "                                                                                                  \n",
              "   289 │   │   hidden_states = self.input_layernorm(hidden_states)                                \n",
              "   290 │   │                                                                                      \n",
              "   291 │   │   # Self Attention                                                                   \n",
              " 292 │   │   hidden_states, self_attn_weights, present_key_value = self.self_attn(              \n",
              "   293 │   │   │   hidden_states=hidden_states,                                                   \n",
              "   294 │   │   │   attention_mask=attention_mask,                                                 \n",
              "   295 │   │   │   position_ids=position_ids,                                                     \n",
              "                                                                                                  \n",
              " /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1501 in _call_impl            \n",
              "                                                                                                  \n",
              "   1498 │   │   if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks   \n",
              "   1499 │   │   │   │   or _global_backward_pre_hooks or _global_backward_hooks                   \n",
              "   1500 │   │   │   │   or _global_forward_hooks or _global_forward_pre_hooks):                   \n",
              " 1501 │   │   │   return forward_call(*args, **kwargs)                                          \n",
              "   1502 │   │   # Do not call functions when jit is used                                          \n",
              "   1503 │   │   full_backward_hooks, non_full_backward_hooks = [], []                             \n",
              "   1504 │   │   backward_pre_hooks = []                                                           \n",
              "                                                                                                  \n",
              " /usr/local/lib/python3.10/dist-packages/accelerate/hooks.py:165 in new_forward                   \n",
              "                                                                                                  \n",
              "   162 │   │   │   with torch.no_grad():                                                          \n",
              "   163 │   │   │   │   output = old_forward(*args, **kwargs)                                      \n",
              "   164 │   │   else:                                                                              \n",
              " 165 │   │   │   output = old_forward(*args, **kwargs)                                          \n",
              "   166 │   │   return module._hf_hook.post_forward(module, output)                                \n",
              "   167 │                                                                                          \n",
              "   168 │   module.forward = new_forward                                                           \n",
              "                                                                                                  \n",
              " /usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py:194 in       \n",
              " forward                                                                                          \n",
              "                                                                                                  \n",
              "   191 │   ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:       \n",
              "   192 │   │   bsz, q_len, _ = hidden_states.size()                                               \n",
              "   193 │   │                                                                                      \n",
              " 194 │   │   query_states = self.q_proj(hidden_states).view(bsz, q_len, self.num_heads, self.   \n",
              "   195 │   │   key_states = self.k_proj(hidden_states).view(bsz, q_len, self.num_heads, self.he   \n",
              "   196 │   │   value_states = self.v_proj(hidden_states).view(bsz, q_len, self.num_heads, self.   \n",
              "   197                                                                                            \n",
              "                                                                                                  \n",
              " /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1501 in _call_impl            \n",
              "                                                                                                  \n",
              "   1498 │   │   if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks   \n",
              "   1499 │   │   │   │   or _global_backward_pre_hooks or _global_backward_hooks                   \n",
              "   1500 │   │   │   │   or _global_forward_hooks or _global_forward_pre_hooks):                   \n",
              " 1501 │   │   │   return forward_call(*args, **kwargs)                                          \n",
              "   1502 │   │   # Do not call functions when jit is used                                          \n",
              "   1503 │   │   full_backward_hooks, non_full_backward_hooks = [], []                             \n",
              "   1504 │   │   backward_pre_hooks = []                                                           \n",
              "                                                                                                  \n",
              " /usr/local/lib/python3.10/dist-packages/peft/tuners/lora.py:709 in forward                       \n",
              "                                                                                                  \n",
              "   706 │   │   │   self.active_adapter = adapter_name                                             \n",
              "   707 │   │                                                                                      \n",
              "   708 │   │   def forward(self, x: torch.Tensor):                                                \n",
              " 709 │   │   │   result = super().forward(x)                                                    \n",
              "   710 │   │   │                                                                                  \n",
              "   711 │   │   │   if self.disable_adapters or self.active_adapter not in self.lora_A.keys():     \n",
              "   712 │   │   │   │   return result                                                              \n",
              "                                                                                                  \n",
              " /usr/local/lib/python3.10/dist-packages/bitsandbytes/nn/modules.py:388 in forward                \n",
              "                                                                                                  \n",
              "   385 │   │   if self.bias is not None and self.bias.dtype != x.dtype:                           \n",
              "   386 │   │   │   self.bias.data = self.bias.data.to(x.dtype)                                    \n",
              "   387 │   │                                                                                      \n",
              " 388 │   │   out = bnb.matmul(x, self.weight, bias=self.bias, state=self.state)                 \n",
              "   389 │   │                                                                                      \n",
              "   390 │   │   if not self.state.has_fp16_weights:                                                \n",
              "   391 │   │   │   if self.state.CB is not None and self.state.CxB is not None:                   \n",
              "                                                                                                  \n",
              " /usr/local/lib/python3.10/dist-packages/bitsandbytes/autograd/_functions.py:559 in matmul        \n",
              "                                                                                                  \n",
              "   556 │   state = state or MatmulLtState()                                                       \n",
              "   557 │   if threshold > 0.0:                                                                    \n",
              "   558 │   │   state.threshold = threshold                                                        \n",
              " 559 return MatMul8bitLt.apply(A, B, out, bias, state)                                      \n",
              "   560                                                                                            \n",
              "   561                                                                                            \n",
              "   562 def matmul_4bit(A: tensor, B: tensor, quant_state: List, out: tensor = None, bias=None):   \n",
              "                                                                                                  \n",
              " /usr/local/lib/python3.10/dist-packages/torch/autograd/function.py:506 in apply                  \n",
              "                                                                                                  \n",
              "   503 │   │   if not torch._C._are_functorch_transforms_active():                                \n",
              "   504 │   │   │   # See NOTE: [functorch vjp and autograd interaction]                           \n",
              "   505 │   │   │   args = _functorch.utils.unwrap_dead_wrappers(args)                             \n",
              " 506 │   │   │   return super().apply(*args, **kwargs)  # type: ignore[misc]                    \n",
              "   507 │   │                                                                                      \n",
              "   508 │   │   if cls.setup_context == _SingleLevelFunction.setup_context:                        \n",
              "   509 │   │   │   raise RuntimeError(                                                            \n",
              "                                                                                                  \n",
              " /usr/local/lib/python3.10/dist-packages/bitsandbytes/autograd/_functions.py:323 in forward       \n",
              "                                                                                                  \n",
              "   320 │   │   # 1. Quantize A                                                                    \n",
              "   321 │   │   if len(A.shape) == 3:                                                              \n",
              "   322 │   │   │   A = A.view(-1, A.shape[-1]).contiguous()                                       \n",
              " 323 │   │   CA, CAt, SCA, SCAt, coo_tensorA = F.double_quant(A.to(torch.float16), threshold=   \n",
              "   324 │   │                                                                                      \n",
              "   325 │   │   if state.threshold > 0.0 and coo_tensorA is not None:                              \n",
              "   326 │   │   │   if state.has_fp16_weights:                                                     \n",
              "                                                                                                  \n",
              " /usr/local/lib/python3.10/dist-packages/bitsandbytes/functional.py:2029 in double_quant          \n",
              "                                                                                                  \n",
              "   2026 │   │   │   │   ct.c_int32(rows),                                                         \n",
              "   2027 │   │   │   │   ct.c_int32(cols),                                                         \n",
              "   2028 │   │   │   )                                                                             \n",
              " 2029 │   │   │   val, idx = torch.sort(coo_tensor.rowidx)                                      \n",
              "   2030 │   │   │   coo_tensor.rowidx = val                                                       \n",
              "   2031 │   │   │   coo_tensor.colidx = coo_tensor.colidx[idx]                                    \n",
              "   2032 │   │   │   coo_tensor.values = coo_tensor.values[idx]                                    \n",
              "╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
              "KeyboardInterrupt\n",
              "
\n" ] }, "metadata": {} } ] } ] }