{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "machine_shape": "hm", "gpuType": "A100" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU", "widgets": { "application/vnd.jupyter.widget-state+json": { "c06bbba05e10462d993f3e7e6f932cf1": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_6f734c35284341d891a44694ddc55b2c", "IPY_MODEL_193be53200ab436a967f1ea4807053e2", "IPY_MODEL_85a2415f14284237875b349b4c414e21" ], "layout": "IPY_MODEL_3ab931b2fcc0493ca71923ebc37127c7" } }, "6f734c35284341d891a44694ddc55b2c": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f49472d0536f4011be17902f9e827807", "placeholder": "​", "style": "IPY_MODEL_2f341cb76f254d0da913faec6a82f762", "value": "Fetching 3 files: 100%" } }, "193be53200ab436a967f1ea4807053e2": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_24ebf383723f4de494f9808b41222751", "max": 3, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_0c17aa90672046c9bd2f293b1a998b46", "value": 3 } }, "85a2415f14284237875b349b4c414e21": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_af89033247b34da2a5cded73b0beade2", "placeholder": "​", "style": "IPY_MODEL_0ea5f5e6ab26484ab22dcf5576f796d1", "value": " 3/3 [01:49<00:00, 109.92s/it]" } }, "3ab931b2fcc0493ca71923ebc37127c7": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "f49472d0536f4011be17902f9e827807": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "2f341cb76f254d0da913faec6a82f762": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "24ebf383723f4de494f9808b41222751": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "0c17aa90672046c9bd2f293b1a998b46": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "af89033247b34da2a5cded73b0beade2": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "0ea5f5e6ab26484ab22dcf5576f796d1": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "946ce9afeddb4da5a36e81e5ada9d957": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_95782b84af1c4014ae04c9e6c9131cbe", "IPY_MODEL_2332101edec848219c3b0c6026c2a722", "IPY_MODEL_a2638b3f10a24de99bb940dcd150ab53" ], "layout": "IPY_MODEL_1c78955b41ba4845931a250f16b753b5" } }, "95782b84af1c4014ae04c9e6c9131cbe": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_148a8c3e4fad4cefa16a478a9758fdc5", "placeholder": "​", "style": "IPY_MODEL_92ef027f2cc940b5b09521328de550b0", "value": "tokenizer.model.v3: 100%" } }, "2332101edec848219c3b0c6026c2a722": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_1a3d2764c7fc41dcb97489e84c28093e", "max": 587404, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_c7b6155f0f844c67b3a2b805570fd6f9", "value": 587404 } }, "a2638b3f10a24de99bb940dcd150ab53": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_1eec374aa3414838a9b41e5db1fefd50", "placeholder": "​", "style": "IPY_MODEL_6fc8cf7aa81c4043878ac854b289dbe3", "value": " 587k/587k [00:00<00:00, 5.39MB/s]" } }, "1c78955b41ba4845931a250f16b753b5": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "148a8c3e4fad4cefa16a478a9758fdc5": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "92ef027f2cc940b5b09521328de550b0": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "1a3d2764c7fc41dcb97489e84c28093e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "c7b6155f0f844c67b3a2b805570fd6f9": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "1eec374aa3414838a9b41e5db1fefd50": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "6fc8cf7aa81c4043878ac854b289dbe3": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "b1aab1a3b5914048962a6d7d63401425": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_10fe81122e6442f28608766d90749790", "IPY_MODEL_f7e72f0a87bc421b82a59ae9ad33a4cb", "IPY_MODEL_2188b8f9491b4d3e8861e40e7c4f6a46" ], "layout": "IPY_MODEL_b3bf1880a5844f8c89096ced830fc954" } }, "10fe81122e6442f28608766d90749790": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_7e0495ffdeb74675847e5e4c2104cc34", "placeholder": "​", "style": "IPY_MODEL_3e0e828a21f24944b68a66eafb52f62b", "value": "params.json: 100%" } }, "f7e72f0a87bc421b82a59ae9ad33a4cb": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_ee07a8e2427c4fc9bd09b27ad11e968a", "max": 202, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_d35818e4f9454d26aa475826e08ea4f0", "value": 202 } }, "2188b8f9491b4d3e8861e40e7c4f6a46": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_e3f7c4fea8494af2a473cf61adccf270", "placeholder": "​", "style": "IPY_MODEL_7e18402efdb34d708b7917964ac791de", "value": " 202/202 [00:00<00:00, 12.7kB/s]" } }, "b3bf1880a5844f8c89096ced830fc954": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "7e0495ffdeb74675847e5e4c2104cc34": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3e0e828a21f24944b68a66eafb52f62b": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "ee07a8e2427c4fc9bd09b27ad11e968a": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "d35818e4f9454d26aa475826e08ea4f0": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "e3f7c4fea8494af2a473cf61adccf270": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "7e18402efdb34d708b7917964ac791de": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "1442445cdf89487784d4a39919fec6bf": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_2ff8ebe8d132411585a05b852362c406", "IPY_MODEL_ef719bb991714d91a365226c5a2ca9df", "IPY_MODEL_1727f9b019e9477282d010e96b7dd4c3" ], "layout": "IPY_MODEL_f82b841d7e5b45229119bd3195e5b12f" } }, "2ff8ebe8d132411585a05b852362c406": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_feb470b16b4249daaa19c1344d036f0a", "placeholder": "​", "style": "IPY_MODEL_2cc30eef6d7b46d283fcfd0e7abca6ea", "value": "consolidated.safetensors: 100%" } }, "ef719bb991714d91a365226c5a2ca9df": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_83b554bec0fd40dd9bd9e4601f2f98a3", "max": 14496078512, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_1c19998de61c4e2dad6647fdc4ca4358", "value": 14496078512 } }, "1727f9b019e9477282d010e96b7dd4c3": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_25df0dd9481a4e0ba21d2f4f4ffdba2e", "placeholder": "​", "style": "IPY_MODEL_cbf620ae5196446c84528feaed64ae6a", "value": " 14.5G/14.5G [01:49<00:00, 87.0MB/s]" } }, "f82b841d7e5b45229119bd3195e5b12f": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "feb470b16b4249daaa19c1344d036f0a": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "2cc30eef6d7b46d283fcfd0e7abca6ea": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "83b554bec0fd40dd9bd9e4601f2f98a3": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "1c19998de61c4e2dad6647fdc4ca4358": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "25df0dd9481a4e0ba21d2f4f4ffdba2e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "cbf620ae5196446c84528feaed64ae6a": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "cells": [ { "cell_type": "markdown", "source": [ "# Getting starting fine-tuning Mistral 7B\n", "\n", "This notebook shows you a simple example of how to LoRA finetune Mistral 7B. You can can run this notebook in Google Colab with Pro + account with A100 and 40GB RAM.\n", "\n", "\n", " \"Open\n", "\n", "\n", "\n", "Check out `mistral-finetune` Github repo to learn more: https://github.com/mistralai/mistral-finetune/" ], "metadata": { "id": "RyuOCYM92LJb" } }, { "cell_type": "markdown", "source": [ "## Installation\n", "\n", "Clone the `mistral-finetune` repo:\n" ], "metadata": { "id": "yxr8mv-17GfB" } }, { "cell_type": "code", "source": [ "%cd /content/\n", "!git clone https://github.com/mistralai/mistral-finetune.git" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "TIj3IlIeVDIb", "outputId": "6ffd6946-26a2-4e3d-e6db-3336cc2c7444" }, "execution_count": 3, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "/content\n", "Cloning into 'mistral-finetune'...\n", "remote: Enumerating objects: 62, done.\u001b[K\n", "remote: Counting objects: 100% (62/62), done.\u001b[K\n", "remote: Compressing objects: 100% (55/55), done.\u001b[K\n", "remote: Total 62 (delta 6), reused 59 (delta 4), pack-reused 0\u001b[K\n", "Receiving objects: 100% (62/62), 90.16 KiB | 3.00 MiB/s, done.\n", "Resolving deltas: 100% (6/6), done.\n" ] } ] }, { "cell_type": "markdown", "source": [ "Install all required dependencies:" ], "metadata": { "id": "mQPd_pGT7WiY" } }, { "cell_type": "code", "source": [ "!pip install -r /content/mistral-finetune/requirements.txt" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "KuTOGipl7BS7", "outputId": "0d332b99-54b1-431b-eb41-4b929087040c" }, "execution_count": 4, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Requirement already satisfied: fire in /usr/local/lib/python3.10/dist-packages (from -r /content/mistral-finetune/requirements.txt (line 1)) (0.6.0)\n", "Requirement already satisfied: simple-parsing in /usr/local/lib/python3.10/dist-packages (from -r /content/mistral-finetune/requirements.txt (line 2)) (0.1.5)\n", "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from -r /content/mistral-finetune/requirements.txt (line 3)) (6.0.1)\n", "Requirement already satisfied: mistral-common>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from -r /content/mistral-finetune/requirements.txt (line 4)) (1.1.0)\n", "Requirement already satisfied: safetensors in /usr/local/lib/python3.10/dist-packages (from -r /content/mistral-finetune/requirements.txt (line 5)) (0.4.3)\n", "Requirement already satisfied: tensorboard in /usr/local/lib/python3.10/dist-packages (from -r /content/mistral-finetune/requirements.txt (line 6)) (2.15.2)\n", "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from -r /content/mistral-finetune/requirements.txt (line 7)) (4.66.4)\n", "Requirement already satisfied: torch==2.2 in /usr/local/lib/python3.10/dist-packages (from -r /content/mistral-finetune/requirements.txt (line 9)) (2.2.0)\n", "Requirement already satisfied: triton==2.2 in /usr/local/lib/python3.10/dist-packages (from -r /content/mistral-finetune/requirements.txt (line 10)) (2.2.0)\n", "Requirement already satisfied: xformers==0.0.24 in /usr/local/lib/python3.10/dist-packages (from -r /content/mistral-finetune/requirements.txt (line 11)) (0.0.24)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (3.14.0)\n", "Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (4.11.0)\n", "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (1.12)\n", "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (3.3)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (3.1.4)\n", "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (2023.6.0)\n", "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (12.1.105)\n", "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (12.1.105)\n", "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (12.1.105)\n", "Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /usr/local/lib/python3.10/dist-packages (from torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (8.9.2.26)\n", "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.10/dist-packages (from torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (12.1.3.1)\n", "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.10/dist-packages (from torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (11.0.2.54)\n", "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.10/dist-packages (from torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (10.3.2.106)\n", "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.10/dist-packages (from torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (11.4.5.107)\n", "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.10/dist-packages (from torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (12.1.0.106)\n", "Requirement already satisfied: nvidia-nccl-cu12==2.19.3 in /usr/local/lib/python3.10/dist-packages (from torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (2.19.3)\n", "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (12.1.105)\n", "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from xformers==0.0.24->-r /content/mistral-finetune/requirements.txt (line 11)) (1.25.2)\n", "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.10/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (12.5.40)\n", "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from fire->-r /content/mistral-finetune/requirements.txt (line 1)) (1.16.0)\n", "Requirement already satisfied: termcolor in /usr/local/lib/python3.10/dist-packages (from fire->-r /content/mistral-finetune/requirements.txt (line 1)) (2.4.0)\n", "Requirement already satisfied: docstring-parser~=0.15 in /usr/local/lib/python3.10/dist-packages (from simple-parsing->-r /content/mistral-finetune/requirements.txt (line 2)) (0.16)\n", "Requirement already satisfied: jsonschema==4.21.1 in /usr/local/lib/python3.10/dist-packages (from mistral-common>=1.1.0->-r /content/mistral-finetune/requirements.txt (line 4)) (4.21.1)\n", "Requirement already satisfied: pydantic==2.6.1 in /usr/local/lib/python3.10/dist-packages (from mistral-common>=1.1.0->-r /content/mistral-finetune/requirements.txt (line 4)) (2.6.1)\n", "Requirement already satisfied: sentencepiece==0.1.99 in /usr/local/lib/python3.10/dist-packages (from mistral-common>=1.1.0->-r /content/mistral-finetune/requirements.txt (line 4)) (0.1.99)\n", "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema==4.21.1->mistral-common>=1.1.0->-r /content/mistral-finetune/requirements.txt (line 4)) (23.2.0)\n", "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema==4.21.1->mistral-common>=1.1.0->-r /content/mistral-finetune/requirements.txt (line 4)) (2023.12.1)\n", "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema==4.21.1->mistral-common>=1.1.0->-r /content/mistral-finetune/requirements.txt (line 4)) (0.35.1)\n", "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema==4.21.1->mistral-common>=1.1.0->-r /content/mistral-finetune/requirements.txt (line 4)) (0.18.1)\n", "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic==2.6.1->mistral-common>=1.1.0->-r /content/mistral-finetune/requirements.txt (line 4)) (0.7.0)\n", "Requirement already satisfied: pydantic-core==2.16.2 in /usr/local/lib/python3.10/dist-packages (from pydantic==2.6.1->mistral-common>=1.1.0->-r /content/mistral-finetune/requirements.txt (line 4)) (2.16.2)\n", "Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (1.4.0)\n", "Requirement already satisfied: grpcio>=1.48.2 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (1.64.0)\n", "Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (2.27.0)\n", "Requirement already satisfied: google-auth-oauthlib<2,>=0.5 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (1.2.0)\n", "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (3.6)\n", "Requirement already satisfied: protobuf!=4.24.0,>=3.19.6 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (3.20.3)\n", "Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (2.31.0)\n", "Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (67.7.2)\n", "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (0.7.2)\n", "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (3.0.3)\n", "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (5.3.3)\n", "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (0.4.0)\n", "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (4.9)\n", "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from google-auth-oauthlib<2,>=0.5->tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (1.3.1)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (3.7)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (2.0.7)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (2024.2.2)\n", "Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.10/dist-packages (from werkzeug>=1.0.1->tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (2.1.5)\n", "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch==2.2->-r /content/mistral-finetune/requirements.txt (line 9)) (1.3.0)\n", "Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (0.6.0)\n", "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<2,>=0.5->tensorboard->-r /content/mistral-finetune/requirements.txt (line 6)) (3.2.2)\n" ] } ] }, { "cell_type": "markdown", "source": [ "## Model download" ], "metadata": { "id": "LgdIAi257jLo" } }, { "cell_type": "code", "source": [ "!wget https://models.mistralcdn.com/mistral-7b-v0-3/mistral-7B-v0.3.tar" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "cdl_R5baUyha", "outputId": "8ddcc9d2-5088-47a8-b5f7-d73c89063246" }, "execution_count": 5, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "--2024-05-24 18:50:25-- https://models.mistralcdn.com/mistral-7b-v0-3/mistral-7B-v0.3.tar\n", "Resolving models.mistralcdn.com (models.mistralcdn.com)... 104.26.6.117, 104.26.7.117, 172.67.70.68, ...\n", "Connecting to models.mistralcdn.com (models.mistralcdn.com)|104.26.6.117|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 14496675840 (14G) [application/x-tar]\n", "Saving to: ‘mistral-7B-v0.3.tar’\n", "\n", "mistral-7B-v0.3.tar 100%[===================>] 13.50G 40.5MB/s in 6m 3s \n", "\n", "2024-05-24 18:56:29 (38.1 MB/s) - ‘mistral-7B-v0.3.tar’ saved [14496675840/14496675840]\n", "\n" ] } ] }, { "cell_type": "code", "source": [ "!DIR=/content/mistral_models && mkdir -p $DIR && tar -xf mistral-7B-v0.3.tar -C $DIR" ], "metadata": { "id": "IgJWR-fReilz" }, "execution_count": 7, "outputs": [] }, { "cell_type": "code", "source": [ "# Alternatively, you can download the model from Hugging Face\n", "\n", "# !pip install huggingface_hub\n", "# from huggingface_hub import snapshot_download\n", "# from pathlib import Path\n", "\n", "# mistral_models_path = Path.home().joinpath('mistral_models', '7B-v0.3')\n", "# mistral_models_path.mkdir(parents=True, exist_ok=True)\n", "\n", "# snapshot_download(repo_id=\"mistralai/Mistral-7B-v0.3\", allow_patterns=[\"params.json\", \"consolidated.safetensors\", \"tokenizer.model.v3\"], local_dir=mistral_models_path)\n", "\n", "#! cp -r /root/mistral_models/7B-v0.3 /content/mistral_models\n", "#! rm -r /root/mistral_models/7B-v0.3" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 163, "referenced_widgets": [ "c06bbba05e10462d993f3e7e6f932cf1", "6f734c35284341d891a44694ddc55b2c", "193be53200ab436a967f1ea4807053e2", "85a2415f14284237875b349b4c414e21", "3ab931b2fcc0493ca71923ebc37127c7", "f49472d0536f4011be17902f9e827807", "2f341cb76f254d0da913faec6a82f762", "24ebf383723f4de494f9808b41222751", "0c17aa90672046c9bd2f293b1a998b46", "af89033247b34da2a5cded73b0beade2", "0ea5f5e6ab26484ab22dcf5576f796d1", "946ce9afeddb4da5a36e81e5ada9d957", "95782b84af1c4014ae04c9e6c9131cbe", "2332101edec848219c3b0c6026c2a722", "a2638b3f10a24de99bb940dcd150ab53", "1c78955b41ba4845931a250f16b753b5", "148a8c3e4fad4cefa16a478a9758fdc5", "92ef027f2cc940b5b09521328de550b0", "1a3d2764c7fc41dcb97489e84c28093e", "c7b6155f0f844c67b3a2b805570fd6f9", "1eec374aa3414838a9b41e5db1fefd50", "6fc8cf7aa81c4043878ac854b289dbe3", "b1aab1a3b5914048962a6d7d63401425", "10fe81122e6442f28608766d90749790", "f7e72f0a87bc421b82a59ae9ad33a4cb", "2188b8f9491b4d3e8861e40e7c4f6a46", "b3bf1880a5844f8c89096ced830fc954", "7e0495ffdeb74675847e5e4c2104cc34", "3e0e828a21f24944b68a66eafb52f62b", "ee07a8e2427c4fc9bd09b27ad11e968a", "d35818e4f9454d26aa475826e08ea4f0", "e3f7c4fea8494af2a473cf61adccf270", "7e18402efdb34d708b7917964ac791de", "1442445cdf89487784d4a39919fec6bf", "2ff8ebe8d132411585a05b852362c406", "ef719bb991714d91a365226c5a2ca9df", "1727f9b019e9477282d010e96b7dd4c3", "f82b841d7e5b45229119bd3195e5b12f", "feb470b16b4249daaa19c1344d036f0a", "2cc30eef6d7b46d283fcfd0e7abca6ea", "83b554bec0fd40dd9bd9e4601f2f98a3", "1c19998de61c4e2dad6647fdc4ca4358", "25df0dd9481a4e0ba21d2f4f4ffdba2e", "cbf620ae5196446c84528feaed64ae6a" ] }, "id": "qgjAADBFHB0S", "outputId": "6dd98910-36fd-4dc1-c5b8-77bb4c104e05" }, "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "Fetching 3 files: 0%| | 0/3 [00:002024-05-24 22:05:01\n", "2024-05-24 19:00:41 (UTC) - 0:02:30 - train - INFO - step: 000002 - done (%): 2.0 - loss: 0.905 - lr: 1.8e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3624.4 - avg_words_per_second: 1008.4 - ETA: >2024-05-24 20:46:50\n", "2024-05-24 19:00:59 (UTC) - 0:02:47 - train - INFO - step: 000003 - done (%): 3.0 - loss: 0.912 - lr: 5.2e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3706.1 - avg_words_per_second: 1331.5 - ETA: >2024-05-24 20:20:33\n", "2024-05-24 19:01:17 (UTC) - 0:03:05 - train - INFO - step: 000004 - done (%): 4.0 - loss: 0.884 - lr: 8.6e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3591.8 - avg_words_per_second: 1580.0 - ETA: >2024-05-24 20:07:39\n", "2024-05-24 19:01:35 (UTC) - 0:03:24 - train - INFO - step: 000005 - done (%): 5.0 - loss: 0.835 - lr: 1.0e-04 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3567.3 - avg_words_per_second: 1778.2 - ETA: >2024-05-24 19:59:57\n", "2024-05-24 19:01:53 (UTC) - 0:03:42 - train - INFO - step: 000006 - done (%): 6.0 - loss: 0.858 - lr: 1.0e-04 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3710.8 - avg_words_per_second: 1947.2 - ETA: >2024-05-24 19:54:37\n", "2024-05-24 19:02:11 (UTC) - 0:04:00 - train - INFO - step: 000007 - done (%): 7.0 - loss: 0.868 - lr: 1.0e-04 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3606.2 - avg_words_per_second: 2084.1 - ETA: >2024-05-24 19:50:55\n", "2024-05-24 19:02:29 (UTC) - 0:04:18 - train - INFO - step: 000008 - done (%): 8.0 - loss: 0.868 - lr: 1.0e-04 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3622.3 - avg_words_per_second: 2201.0 - ETA: >2024-05-24 19:48:09\n", "2024-05-24 19:02:47 (UTC) - 0:04:36 - train - INFO - step: 000009 - done (%): 9.0 - loss: 0.804 - lr: 1.0e-04 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3690.9 - avg_words_per_second: 2304.3 - ETA: >2024-05-24 19:45:55\n", "2024-05-24 19:03:05 (UTC) - 0:04:54 - train - INFO - step: 000010 - done (%): 10.0 - loss: 0.898 - lr: 9.9e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3584.2 - avg_words_per_second: 2389.7 - ETA: >2024-05-24 19:44:13\n", "2024-05-24 19:03:23 (UTC) - 0:05:12 - train - INFO - step: 000011 - done (%): 11.0 - loss: 0.744 - lr: 9.9e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3695.7 - avg_words_per_second: 2469.0 - ETA: >2024-05-24 19:42:45\n", "2024-05-24 19:03:41 (UTC) - 0:05:30 - train - INFO - step: 000012 - done (%): 12.0 - loss: 0.837 - lr: 9.9e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3575.5 - avg_words_per_second: 2534.3 - ETA: >2024-05-24 19:41:37\n", "2024-05-24 19:04:00 (UTC) - 0:05:48 - train - INFO - step: 000013 - done (%): 13.0 - loss: 0.846 - lr: 9.8e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3561.0 - avg_words_per_second: 2591.8 - ETA: >2024-05-24 19:40:40\n", "2024-05-24 19:04:17 (UTC) - 0:06:06 - train - INFO - step: 000014 - done (%): 14.0 - loss: 0.854 - lr: 9.8e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3694.8 - avg_words_per_second: 2648.3 - ETA: >2024-05-24 19:39:46\n", "2024-05-24 19:04:36 (UTC) - 0:06:24 - train - INFO - step: 000015 - done (%): 15.0 - loss: 0.911 - lr: 9.7e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3579.8 - avg_words_per_second: 2695.0 - ETA: >2024-05-24 19:39:03\n", "2024-05-24 19:04:54 (UTC) - 0:06:42 - train - INFO - step: 000016 - done (%): 16.0 - loss: 0.815 - lr: 9.7e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3686.7 - avg_words_per_second: 2741.1 - ETA: >2024-05-24 19:38:22\n", "2024-05-24 19:05:12 (UTC) - 0:07:00 - train - INFO - step: 000017 - done (%): 17.0 - loss: 0.821 - lr: 9.6e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3605.0 - avg_words_per_second: 2780.3 - ETA: >2024-05-24 19:37:48\n", "2024-05-24 19:05:30 (UTC) - 0:07:19 - train - INFO - step: 000018 - done (%): 18.0 - loss: 0.842 - lr: 9.5e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3601.2 - avg_words_per_second: 2816.0 - ETA: >2024-05-24 19:37:18\n", "2024-05-24 19:05:48 (UTC) - 0:07:36 - train - INFO - step: 000019 - done (%): 19.0 - loss: 0.802 - lr: 9.5e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3700.0 - avg_words_per_second: 2851.8 - ETA: >2024-05-24 19:36:49\n", "2024-05-24 19:06:06 (UTC) - 0:07:54 - train - INFO - step: 000020 - done (%): 20.0 - loss: 0.867 - lr: 9.4e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3599.4 - avg_words_per_second: 2881.8 - ETA: >2024-05-24 19:36:25\n", "2024-05-24 19:06:24 (UTC) - 0:08:13 - train - INFO - step: 000021 - done (%): 21.0 - loss: 0.826 - lr: 9.3e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3602.7 - avg_words_per_second: 2909.5 - ETA: >2024-05-24 19:36:03\n", "2024-05-24 19:06:42 (UTC) - 0:08:30 - train - INFO - step: 000022 - done (%): 22.0 - loss: 0.809 - lr: 9.2e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3696.9 - avg_words_per_second: 2937.9 - ETA: >2024-05-24 19:35:42\n", "2024-05-24 19:07:00 (UTC) - 0:08:49 - train - INFO - step: 000023 - done (%): 23.0 - loss: 0.837 - lr: 9.1e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3608.7 - avg_words_per_second: 2961.9 - ETA: >2024-05-24 19:35:24\n", "2024-05-24 19:07:18 (UTC) - 0:09:06 - train - INFO - step: 000024 - done (%): 24.0 - loss: 0.851 - lr: 9.0e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3690.6 - avg_words_per_second: 2986.4 - ETA: >2024-05-24 19:35:05\n", "2024-05-24 19:07:36 (UTC) - 0:09:24 - train - INFO - step: 000025 - done (%): 25.0 - loss: 0.856 - lr: 8.9e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3596.4 - avg_words_per_second: 3006.8 - ETA: >2024-05-24 19:34:51\n", "2024-05-24 19:07:54 (UTC) - 0:09:43 - train - INFO - step: 000026 - done (%): 26.0 - loss: 0.781 - lr: 8.8e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3576.7 - avg_words_per_second: 3025.4 - ETA: >2024-05-24 19:34:37\n", "2024-05-24 19:08:12 (UTC) - 0:10:01 - train - INFO - step: 000027 - done (%): 27.0 - loss: 0.845 - lr: 8.7e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3695.0 - avg_words_per_second: 3045.8 - ETA: >2024-05-24 19:34:23\n", "2024-05-24 19:08:30 (UTC) - 0:10:19 - train - INFO - step: 000028 - done (%): 28.0 - loss: 0.831 - lr: 8.6e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3603.0 - avg_words_per_second: 3062.7 - ETA: >2024-05-24 19:34:11\n", "2024-05-24 19:08:48 (UTC) - 0:10:37 - train - INFO - step: 000029 - done (%): 29.0 - loss: 0.806 - lr: 8.5e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3612.2 - avg_words_per_second: 3078.9 - ETA: >2024-05-24 19:34:00\n", "2024-05-24 19:09:06 (UTC) - 0:10:55 - train - INFO - step: 000030 - done (%): 30.0 - loss: 0.898 - lr: 8.4e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3697.6 - avg_words_per_second: 3096.2 - ETA: >2024-05-24 19:33:48\n", "2024-05-24 19:09:24 (UTC) - 0:11:13 - train - INFO - step: 000031 - done (%): 31.0 - loss: 0.817 - lr: 8.3e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3588.0 - avg_words_per_second: 3109.9 - ETA: >2024-05-24 19:33:38\n", "2024-05-24 19:09:42 (UTC) - 0:11:31 - train - INFO - step: 000032 - done (%): 32.0 - loss: 0.825 - lr: 8.1e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3687.5 - avg_words_per_second: 3125.2 - ETA: >2024-05-24 19:33:28\n", "2024-05-24 19:10:00 (UTC) - 0:11:49 - train - INFO - step: 000033 - done (%): 33.0 - loss: 0.845 - lr: 8.0e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3593.7 - avg_words_per_second: 3137.6 - ETA: >2024-05-24 19:33:20\n", "2024-05-24 19:10:19 (UTC) - 0:12:07 - train - INFO - step: 000034 - done (%): 34.0 - loss: 0.808 - lr: 7.9e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3558.8 - avg_words_per_second: 3148.6 - ETA: >2024-05-24 19:33:12\n", "2024-05-24 19:10:36 (UTC) - 0:12:25 - train - INFO - step: 000035 - done (%): 35.0 - loss: 0.853 - lr: 7.7e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3694.1 - avg_words_per_second: 3161.9 - ETA: >2024-05-24 19:33:04\n", "2024-05-24 19:10:55 (UTC) - 0:12:43 - train - INFO - step: 000036 - done (%): 36.0 - loss: 0.813 - lr: 7.6e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3585.6 - avg_words_per_second: 3172.3 - ETA: >2024-05-24 19:32:57\n", "2024-05-24 19:11:12 (UTC) - 0:13:01 - train - INFO - step: 000037 - done (%): 37.0 - loss: 0.801 - lr: 7.5e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3694.8 - avg_words_per_second: 3184.5 - ETA: >2024-05-24 19:32:49\n", "2024-05-24 19:11:31 (UTC) - 0:13:19 - train - INFO - step: 000038 - done (%): 38.0 - loss: 0.744 - lr: 7.3e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3595.3 - avg_words_per_second: 3194.1 - ETA: >2024-05-24 19:32:43\n", "2024-05-24 19:11:49 (UTC) - 0:13:37 - train - INFO - step: 000039 - done (%): 39.0 - loss: 0.816 - lr: 7.2e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3607.8 - avg_words_per_second: 3203.5 - ETA: >2024-05-24 19:32:37\n", "2024-05-24 19:12:07 (UTC) - 0:13:55 - train - INFO - step: 000040 - done (%): 40.0 - loss: 0.786 - lr: 7.0e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3691.9 - avg_words_per_second: 3214.1 - ETA: >2024-05-24 19:32:30\n", "2024-05-24 19:12:25 (UTC) - 0:14:13 - train - INFO - step: 000041 - done (%): 41.0 - loss: 0.804 - lr: 6.9e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3600.7 - avg_words_per_second: 3222.6 - ETA: >2024-05-24 19:32:25\n", "2024-05-24 19:12:43 (UTC) - 0:14:32 - train - INFO - step: 000042 - done (%): 42.0 - loss: 0.845 - lr: 6.7e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3616.1 - avg_words_per_second: 3230.9 - ETA: >2024-05-24 19:32:19\n", "2024-05-24 19:13:01 (UTC) - 0:14:49 - train - INFO - step: 000043 - done (%): 43.0 - loss: 0.864 - lr: 6.5e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3688.4 - avg_words_per_second: 3240.3 - ETA: >2024-05-24 19:32:14\n", "2024-05-24 19:13:19 (UTC) - 0:15:07 - train - INFO - step: 000044 - done (%): 44.0 - loss: 0.862 - lr: 6.4e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3616.8 - avg_words_per_second: 3248.0 - ETA: >2024-05-24 19:32:09\n", "2024-05-24 19:13:37 (UTC) - 0:15:25 - train - INFO - step: 000045 - done (%): 45.0 - loss: 0.862 - lr: 6.2e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3690.6 - avg_words_per_second: 3256.7 - ETA: >2024-05-24 19:32:03\n", "2024-05-24 19:13:55 (UTC) - 0:15:43 - train - INFO - step: 000046 - done (%): 46.0 - loss: 0.829 - lr: 6.1e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3601.5 - avg_words_per_second: 3263.5 - ETA: >2024-05-24 19:31:59\n", "2024-05-24 19:14:13 (UTC) - 0:16:02 - train - INFO - step: 000047 - done (%): 47.0 - loss: 0.812 - lr: 5.9e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3590.5 - avg_words_per_second: 3269.8 - ETA: >2024-05-24 19:31:55\n", "2024-05-24 19:14:31 (UTC) - 0:16:19 - train - INFO - step: 000048 - done (%): 48.0 - loss: 0.818 - lr: 5.7e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3704.5 - avg_words_per_second: 3277.8 - ETA: >2024-05-24 19:31:50\n", "2024-05-24 19:14:49 (UTC) - 0:16:38 - train - INFO - step: 000049 - done (%): 49.0 - loss: 0.817 - lr: 5.6e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3608.4 - avg_words_per_second: 3283.9 - ETA: >2024-05-24 19:31:47\n", "2024-05-24 19:15:07 (UTC) - 0:16:56 - train - INFO - step: 000050 - done (%): 50.0 - loss: 0.888 - lr: 5.4e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3619.2 - avg_words_per_second: 3290.0 - ETA: >2024-05-24 19:31:43\n", "2024-05-24 19:15:25 (UTC) - 0:17:13 - train - INFO - step: 000051 - done (%): 51.0 - loss: 0.777 - lr: 5.2e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3690.4 - avg_words_per_second: 3297.0 - ETA: >2024-05-24 19:31:39\n", "2024-05-24 19:15:43 (UTC) - 0:17:32 - train - INFO - step: 000052 - done (%): 52.0 - loss: 0.804 - lr: 5.1e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3589.4 - avg_words_per_second: 3302.2 - ETA: >2024-05-24 19:31:36\n", "2024-05-24 19:16:01 (UTC) - 0:17:49 - train - INFO - step: 000053 - done (%): 53.0 - loss: 0.800 - lr: 4.9e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3692.1 - avg_words_per_second: 3308.8 - ETA: >2024-05-24 19:31:32\n", "2024-05-24 19:16:19 (UTC) - 0:18:08 - train - INFO - step: 000054 - done (%): 54.0 - loss: 0.804 - lr: 4.8e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3588.5 - avg_words_per_second: 3313.6 - ETA: >2024-05-24 19:31:29\n", "2024-05-24 19:16:37 (UTC) - 0:18:26 - train - INFO - step: 000055 - done (%): 55.0 - loss: 0.854 - lr: 4.6e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3571.2 - avg_words_per_second: 3317.9 - ETA: >2024-05-24 19:31:26\n", "2024-05-24 19:16:55 (UTC) - 0:18:44 - train - INFO - step: 000056 - done (%): 56.0 - loss: 0.819 - lr: 4.4e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3696.4 - avg_words_per_second: 3324.0 - ETA: >2024-05-24 19:31:23\n", "2024-05-24 19:17:13 (UTC) - 0:19:02 - train - INFO - step: 000057 - done (%): 57.0 - loss: 0.844 - lr: 4.3e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3606.8 - avg_words_per_second: 3328.6 - ETA: >2024-05-24 19:31:20\n", "2024-05-24 19:17:31 (UTC) - 0:19:20 - train - INFO - step: 000058 - done (%): 58.0 - loss: 0.840 - lr: 4.1e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3706.0 - avg_words_per_second: 3334.5 - ETA: >2024-05-24 19:31:16\n", "2024-05-24 19:17:49 (UTC) - 0:19:38 - train - INFO - step: 000059 - done (%): 59.0 - loss: 0.836 - lr: 3.9e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3616.5 - avg_words_per_second: 3338.9 - ETA: >2024-05-24 19:31:14\n", "2024-05-24 19:18:07 (UTC) - 0:19:56 - train - INFO - step: 000060 - done (%): 60.0 - loss: 0.852 - lr: 3.8e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3610.4 - avg_words_per_second: 3343.1 - ETA: >2024-05-24 19:31:11\n", "2024-05-24 19:18:25 (UTC) - 0:20:14 - train - INFO - step: 000061 - done (%): 61.0 - loss: 0.837 - lr: 3.6e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3704.4 - avg_words_per_second: 3348.4 - ETA: >2024-05-24 19:31:08\n", "2024-05-24 19:18:43 (UTC) - 0:20:32 - train - INFO - step: 000062 - done (%): 62.0 - loss: 0.839 - lr: 3.5e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3607.6 - avg_words_per_second: 3352.3 - ETA: >2024-05-24 19:31:06\n", "2024-05-24 19:19:01 (UTC) - 0:20:50 - train - INFO - step: 000063 - done (%): 63.0 - loss: 0.813 - lr: 3.3e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3604.6 - avg_words_per_second: 3356.0 - ETA: >2024-05-24 19:31:04\n", "2024-05-24 19:19:19 (UTC) - 0:21:08 - train - INFO - step: 000064 - done (%): 64.0 - loss: 0.784 - lr: 3.1e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3689.6 - avg_words_per_second: 3360.8 - ETA: >2024-05-24 19:31:01\n", "2024-05-24 19:19:37 (UTC) - 0:21:26 - train - INFO - step: 000065 - done (%): 65.0 - loss: 0.797 - lr: 3.0e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3604.0 - avg_words_per_second: 3364.3 - ETA: >2024-05-24 19:30:59\n", "2024-05-24 19:19:55 (UTC) - 0:21:44 - train - INFO - step: 000066 - done (%): 66.0 - loss: 0.788 - lr: 2.8e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3697.6 - avg_words_per_second: 3368.9 - ETA: >2024-05-24 19:30:56\n", "2024-05-24 19:20:13 (UTC) - 0:22:02 - train - INFO - step: 000067 - done (%): 67.0 - loss: 0.902 - lr: 2.7e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3588.6 - avg_words_per_second: 3372.0 - ETA: >2024-05-24 19:30:55\n", "2024-05-24 19:20:32 (UTC) - 0:22:20 - train - INFO - step: 000068 - done (%): 68.0 - loss: 0.783 - lr: 2.5e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3565.3 - avg_words_per_second: 3374.6 - ETA: >2024-05-24 19:30:53\n", "2024-05-24 19:20:49 (UTC) - 0:22:38 - train - INFO - step: 000069 - done (%): 69.0 - loss: 0.889 - lr: 2.4e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3689.3 - avg_words_per_second: 3378.8 - ETA: >2024-05-24 19:30:51\n", "2024-05-24 19:21:08 (UTC) - 0:22:56 - train - INFO - step: 000070 - done (%): 70.0 - loss: 0.810 - lr: 2.3e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3601.3 - avg_words_per_second: 3381.8 - ETA: >2024-05-24 19:30:49\n", "2024-05-24 19:21:26 (UTC) - 0:23:14 - train - INFO - step: 000071 - done (%): 71.0 - loss: 0.796 - lr: 2.1e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3621.7 - avg_words_per_second: 3385.0 - ETA: >2024-05-24 19:30:47\n", "2024-05-24 19:21:43 (UTC) - 0:23:32 - train - INFO - step: 000072 - done (%): 72.0 - loss: 0.927 - lr: 2.0e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3681.6 - avg_words_per_second: 3388.8 - ETA: >2024-05-24 19:30:45\n", "2024-05-24 19:22:02 (UTC) - 0:23:50 - train - INFO - step: 000073 - done (%): 73.0 - loss: 0.811 - lr: 1.9e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3585.4 - avg_words_per_second: 3391.3 - ETA: >2024-05-24 19:30:44\n", "2024-05-24 19:22:20 (UTC) - 0:24:08 - train - INFO - step: 000074 - done (%): 74.0 - loss: 0.828 - lr: 1.7e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3678.2 - avg_words_per_second: 3394.9 - ETA: >2024-05-24 19:30:42\n", "2024-05-24 19:22:38 (UTC) - 0:24:26 - train - INFO - step: 000075 - done (%): 75.0 - loss: 0.908 - lr: 1.6e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3584.0 - avg_words_per_second: 3397.3 - ETA: >2024-05-24 19:30:40\n", "2024-05-24 19:22:56 (UTC) - 0:24:45 - train - INFO - step: 000076 - done (%): 76.0 - loss: 0.881 - lr: 1.5e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3575.5 - avg_words_per_second: 3399.5 - ETA: >2024-05-24 19:30:39\n", "2024-05-24 19:23:14 (UTC) - 0:25:03 - train - INFO - step: 000077 - done (%): 77.0 - loss: 0.819 - lr: 1.4e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3706.5 - avg_words_per_second: 3403.2 - ETA: >2024-05-24 19:30:37\n", "2024-05-24 19:23:32 (UTC) - 0:25:21 - train - INFO - step: 000078 - done (%): 78.0 - loss: 0.867 - lr: 1.3e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3600.9 - avg_words_per_second: 3405.6 - ETA: >2024-05-24 19:30:35\n", "2024-05-24 19:23:50 (UTC) - 0:25:38 - train - INFO - step: 000079 - done (%): 79.0 - loss: 0.913 - lr: 1.2e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3694.4 - avg_words_per_second: 3408.9 - ETA: >2024-05-24 19:30:34\n", "2024-05-24 19:24:08 (UTC) - 0:25:57 - train - INFO - step: 000080 - done (%): 80.0 - loss: 0.826 - lr: 1.1e-05 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3603.8 - avg_words_per_second: 3411.2 - ETA: >2024-05-24 19:30:32\n", "2024-05-24 19:24:26 (UTC) - 0:26:15 - train - INFO - step: 000081 - done (%): 81.0 - loss: 0.835 - lr: 9.5e-06 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3611.0 - avg_words_per_second: 3413.6 - ETA: >2024-05-24 19:30:31\n", "2024-05-24 19:24:44 (UTC) - 0:26:33 - train - INFO - step: 000082 - done (%): 82.0 - loss: 0.854 - lr: 8.6e-06 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3686.5 - avg_words_per_second: 3416.7 - ETA: >2024-05-24 19:30:29\n", "2024-05-24 19:25:02 (UTC) - 0:26:51 - train - INFO - step: 000083 - done (%): 83.0 - loss: 0.772 - lr: 7.7e-06 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3596.4 - avg_words_per_second: 3418.7 - ETA: >2024-05-24 19:30:28\n", "2024-05-24 19:25:20 (UTC) - 0:27:09 - train - INFO - step: 000084 - done (%): 84.0 - loss: 0.813 - lr: 6.8e-06 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3607.9 - avg_words_per_second: 3420.9 - ETA: >2024-05-24 19:30:27\n", "2024-05-24 19:25:38 (UTC) - 0:27:27 - train - INFO - step: 000085 - done (%): 85.0 - loss: 0.836 - lr: 6.0e-06 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3690.1 - avg_words_per_second: 3423.8 - ETA: >2024-05-24 19:30:25\n", "2024-05-24 19:25:56 (UTC) - 0:27:45 - train - INFO - step: 000086 - done (%): 86.0 - loss: 0.866 - lr: 5.3e-06 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3607.4 - avg_words_per_second: 3425.8 - ETA: >2024-05-24 19:30:24\n", "2024-05-24 19:26:14 (UTC) - 0:28:03 - train - INFO - step: 000087 - done (%): 87.0 - loss: 0.845 - lr: 4.6e-06 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3706.4 - avg_words_per_second: 3428.8 - ETA: >2024-05-24 19:30:22\n", "2024-05-24 19:26:32 (UTC) - 0:28:21 - train - INFO - step: 000088 - done (%): 88.0 - loss: 0.777 - lr: 3.9e-06 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3591.4 - avg_words_per_second: 3430.6 - ETA: >2024-05-24 19:30:21\n", "2024-05-24 19:26:51 (UTC) - 0:28:39 - train - INFO - step: 000089 - done (%): 89.0 - loss: 0.813 - lr: 3.3e-06 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3569.4 - avg_words_per_second: 3432.1 - ETA: >2024-05-24 19:30:21\n", "2024-05-24 19:27:08 (UTC) - 0:28:57 - train - INFO - step: 000090 - done (%): 90.0 - loss: 0.841 - lr: 2.7e-06 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3697.1 - avg_words_per_second: 3434.8 - ETA: >2024-05-24 19:30:19\n", "2024-05-24 19:27:26 (UTC) - 0:29:15 - train - INFO - step: 000091 - done (%): 91.0 - loss: 0.807 - lr: 2.2e-06 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3611.5 - avg_words_per_second: 3436.6 - ETA: >2024-05-24 19:30:18\n", "2024-05-24 19:27:45 (UTC) - 0:29:33 - train - INFO - step: 000092 - done (%): 92.0 - loss: 0.807 - lr: 1.7e-06 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3607.2 - avg_words_per_second: 3438.4 - ETA: >2024-05-24 19:30:17\n", "2024-05-24 19:28:02 (UTC) - 0:29:51 - train - INFO - step: 000093 - done (%): 93.0 - loss: 0.827 - lr: 1.3e-06 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3696.3 - avg_words_per_second: 3441.0 - ETA: >2024-05-24 19:30:16\n", "2024-05-24 19:28:21 (UTC) - 0:30:09 - train - INFO - step: 000094 - done (%): 94.0 - loss: 0.816 - lr: 9.8e-07 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3583.1 - avg_words_per_second: 3442.5 - ETA: >2024-05-24 19:30:15\n", "2024-05-24 19:28:38 (UTC) - 0:30:27 - train - INFO - step: 000095 - done (%): 95.0 - loss: 0.825 - lr: 6.8e-07 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3695.6 - avg_words_per_second: 3444.9 - ETA: >2024-05-24 19:30:13\n", "2024-05-24 19:28:57 (UTC) - 0:30:45 - train - INFO - step: 000096 - done (%): 96.0 - loss: 0.794 - lr: 4.4e-07 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3580.1 - avg_words_per_second: 3446.3 - ETA: >2024-05-24 19:30:13\n", "2024-05-24 19:29:15 (UTC) - 0:31:04 - train - INFO - step: 000097 - done (%): 97.0 - loss: 0.884 - lr: 2.5e-07 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3574.7 - avg_words_per_second: 3447.6 - ETA: >2024-05-24 19:30:12\n", "2024-05-24 19:29:33 (UTC) - 0:31:21 - train - INFO - step: 000098 - done (%): 98.0 - loss: 0.826 - lr: 1.1e-07 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3698.8 - avg_words_per_second: 3450.0 - ETA: >2024-05-24 19:30:11\n", "2024-05-24 19:29:51 (UTC) - 0:31:40 - train - INFO - step: 000099 - done (%): 99.0 - loss: 0.771 - lr: 2.8e-08 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 3604.2 - avg_words_per_second: 3451.5 - ETA: >2024-05-24 19:30:10\n", "2024-05-24 19:30:09 (UTC) - 0:31:57 - eval - INFO - Start eval...\n", "2024-05-24 19:32:04 (UTC) - 0:33:53 - eval - INFO - Eval finished!\n", "2024-05-24 19:32:04 (UTC) - 0:33:53 - train - INFO - step: 000100 - eval_perplexity: 1.779 - eval_loss: 0.831 - train_loss: 0.762\n", "2024-05-24 19:32:04 (UTC) - 0:33:53 - train - INFO - step: 000100 - done (%): 100.0 - loss: 0.762 - lr: 4.0e-10 - peak_alloc_mem (GB): 22.2 - alloc_mem (GB): 17.1 - words_per_second: 492.8 - avg_words_per_second: 3256.0 - ETA: >2024-05-24 19:32:04\n", "2024-05-24 19:32:04 (UTC) - 0:33:53 - checkpointing - INFO - Dumping checkpoint in /content/test_ultra/checkpoints/checkpoint_000100/consolidated using tmp name: tmp.consolidated\n", "2024-05-24 19:32:05 (UTC) - 0:33:53 - checkpointing - INFO - Done dumping checkpoint in /content/test_ultra/checkpoints/checkpoint_000100/consolidated for step: 100\n", "2024-05-24 19:32:05 (UTC) - 0:33:53 - checkpointing - INFO - Done deleting checkpoints \n", "2024-05-24 19:32:05 (UTC) - 0:33:53 - checkpointing - INFO - Done!\n", "2024-05-24 19:32:05 (UTC) - 0:33:53 - train - INFO - done!\n", "2024-05-24 19:32:05 (UTC) - 0:33:53 - utils - INFO - Closing: eval_logger\n", "2024-05-24 19:32:05 (UTC) - 0:33:53 - utils - INFO - Closed: eval_logger\n", "2024-05-24 19:32:05 (UTC) - 0:33:53 - utils - INFO - Closing: metrics_logger\n", "2024-05-24 19:32:05 (UTC) - 0:33:53 - utils - INFO - Closed: metrics_logger\n", "2024-05-24 19:32:05 (UTC) - 0:33:53 - train - INFO - Closed everything!\n" ] } ] }, { "cell_type": "markdown", "source": [ "## Inference" ], "metadata": { "id": "ruJ29JFn98zE" } }, { "cell_type": "code", "source": [ "!pip install mistral_inference" ], "metadata": { "id": "7BWNGKt9-Kxz", "outputId": "61479b03-c608-455b-e99b-32d96ada9ca6", "colab": { "base_uri": "https://localhost:8080/" } }, "execution_count": 24, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting mistral_inference\n", " Downloading mistral_inference-1.1.0-py3-none-any.whl (21 kB)\n", "Requirement already satisfied: fire>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from mistral_inference) (0.6.0)\n", "Requirement already satisfied: mistral_common<2.0.0,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from mistral_inference) (1.1.0)\n", "Requirement already satisfied: safetensors>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from mistral_inference) (0.4.3)\n", "Requirement already satisfied: simple-parsing>=0.1.5 in /usr/local/lib/python3.10/dist-packages (from mistral_inference) (0.1.5)\n", "Requirement already satisfied: xformers>=0.0.24 in /usr/local/lib/python3.10/dist-packages (from mistral_inference) (0.0.24)\n", "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from fire>=0.6.0->mistral_inference) (1.16.0)\n", "Requirement already satisfied: termcolor in /usr/local/lib/python3.10/dist-packages (from fire>=0.6.0->mistral_inference) (2.4.0)\n", "Requirement already satisfied: jsonschema==4.21.1 in /usr/local/lib/python3.10/dist-packages (from mistral_common<2.0.0,>=1.0.0->mistral_inference) (4.21.1)\n", "Requirement already satisfied: pydantic==2.6.1 in /usr/local/lib/python3.10/dist-packages (from mistral_common<2.0.0,>=1.0.0->mistral_inference) (2.6.1)\n", "Requirement already satisfied: sentencepiece==0.1.99 in /usr/local/lib/python3.10/dist-packages (from mistral_common<2.0.0,>=1.0.0->mistral_inference) (0.1.99)\n", "Requirement already satisfied: typing-extensions<5.0.0,>=4.11.0 in /usr/local/lib/python3.10/dist-packages (from mistral_common<2.0.0,>=1.0.0->mistral_inference) (4.11.0)\n", "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema==4.21.1->mistral_common<2.0.0,>=1.0.0->mistral_inference) (23.2.0)\n", "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema==4.21.1->mistral_common<2.0.0,>=1.0.0->mistral_inference) (2023.12.1)\n", "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema==4.21.1->mistral_common<2.0.0,>=1.0.0->mistral_inference) (0.35.1)\n", "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema==4.21.1->mistral_common<2.0.0,>=1.0.0->mistral_inference) (0.18.1)\n", "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic==2.6.1->mistral_common<2.0.0,>=1.0.0->mistral_inference) (0.7.0)\n", "Requirement already satisfied: pydantic-core==2.16.2 in /usr/local/lib/python3.10/dist-packages (from pydantic==2.6.1->mistral_common<2.0.0,>=1.0.0->mistral_inference) (2.16.2)\n", "Requirement already satisfied: docstring-parser~=0.15 in /usr/local/lib/python3.10/dist-packages (from simple-parsing>=0.1.5->mistral_inference) (0.16)\n", "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from xformers>=0.0.24->mistral_inference) (1.25.2)\n", "Requirement already satisfied: torch==2.2.0 in /usr/local/lib/python3.10/dist-packages (from xformers>=0.0.24->mistral_inference) (2.2.0)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch==2.2.0->xformers>=0.0.24->mistral_inference) (3.14.0)\n", "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch==2.2.0->xformers>=0.0.24->mistral_inference) (1.12)\n", "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch==2.2.0->xformers>=0.0.24->mistral_inference) (3.3)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch==2.2.0->xformers>=0.0.24->mistral_inference) (3.1.4)\n", "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch==2.2.0->xformers>=0.0.24->mistral_inference) (2023.6.0)\n", "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch==2.2.0->xformers>=0.0.24->mistral_inference) (12.1.105)\n", "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch==2.2.0->xformers>=0.0.24->mistral_inference) (12.1.105)\n", "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch==2.2.0->xformers>=0.0.24->mistral_inference) (12.1.105)\n", "Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /usr/local/lib/python3.10/dist-packages (from torch==2.2.0->xformers>=0.0.24->mistral_inference) (8.9.2.26)\n", "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.10/dist-packages (from torch==2.2.0->xformers>=0.0.24->mistral_inference) (12.1.3.1)\n", "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.10/dist-packages (from torch==2.2.0->xformers>=0.0.24->mistral_inference) (11.0.2.54)\n", "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.10/dist-packages (from torch==2.2.0->xformers>=0.0.24->mistral_inference) (10.3.2.106)\n", "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.10/dist-packages (from torch==2.2.0->xformers>=0.0.24->mistral_inference) (11.4.5.107)\n", "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.10/dist-packages (from torch==2.2.0->xformers>=0.0.24->mistral_inference) (12.1.0.106)\n", "Requirement already satisfied: nvidia-nccl-cu12==2.19.3 in /usr/local/lib/python3.10/dist-packages (from torch==2.2.0->xformers>=0.0.24->mistral_inference) (2.19.3)\n", "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch==2.2.0->xformers>=0.0.24->mistral_inference) (12.1.105)\n", "Requirement already satisfied: triton==2.2.0 in /usr/local/lib/python3.10/dist-packages (from torch==2.2.0->xformers>=0.0.24->mistral_inference) (2.2.0)\n", "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.10/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch==2.2.0->xformers>=0.0.24->mistral_inference) (12.5.40)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch==2.2.0->xformers>=0.0.24->mistral_inference) (2.1.5)\n", "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch==2.2.0->xformers>=0.0.24->mistral_inference) (1.3.0)\n", "Installing collected packages: mistral_inference\n", "Successfully installed mistral_inference-1.1.0\n" ] } ] }, { "cell_type": "code", "source": [ "from mistral_inference.model import Transformer\n", "from mistral_inference.generate import generate\n", "\n", "from mistral_common.tokens.tokenizers.mistral import MistralTokenizer\n", "from mistral_common.protocol.instruct.messages import UserMessage\n", "from mistral_common.protocol.instruct.request import ChatCompletionRequest\n", "\n", "\n", "tokenizer = MistralTokenizer.from_file(\"/content/mistral_models/tokenizer.model.v3\") # change to extracted tokenizer file\n", "model = Transformer.from_folder(\"/content/mistral_models\") # change to extracted model dir\n", "model.load_lora(\"/content/test_ultra/checkpoints/checkpoint_000100/consolidated/lora.safetensors\")\n", "\n", "completion_request = ChatCompletionRequest(messages=[UserMessage(content=\"Explain Machine Learning to me in a nutshell.\")])\n", "\n", "tokens = tokenizer.encode_chat_completion(completion_request).tokens\n", "\n", "out_tokens, _ = generate([tokens], model, max_tokens=64, temperature=0.0, eos_id=tokenizer.instruct_tokenizer.tokenizer.eos_id)\n", "result = tokenizer.instruct_tokenizer.tokenizer.decode(out_tokens[0])\n", "\n", "print(result)" ], "metadata": { "id": "F-xLs2Ot9-il", "outputId": "f0c6f171-b14c-4d0c-d5e9-cb24a7f07653", "colab": { "base_uri": "https://localhost:8080/" } }, "execution_count": 25, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Machine learning is a subset of artificial intelligence that involves the use of algorithms to learn from data and make predictions or decisions without being explicitly programmed. It is a type of computer science that enables machines to learn and improve from experience without being explicitly programmed. Machine learning algorithms can learn from data and make predictions or decisions based\n" ] } ] }, { "cell_type": "code", "source": [], "metadata": { "id": "Vd8A8JP4Fx3C" }, "execution_count": null, "outputs": [] } ] }