diff --git "a/t5_fine_tuning.ipynb" "b/t5_fine_tuning.ipynb" new file mode 100644--- /dev/null +++ "b/t5_fine_tuning.ipynb" @@ -0,0 +1,12035 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "collapsed_sections": [ + "HVxGfmEMCKs_", + "RKNr7fgzcKpZ", + "vfhlYUUV2NIh", + "b3C13iabZvwK", + "qdEgCwL7cIyi", + "W4cfw8bMcNdA", + "brPOSAkjNP5t", + "Dhqigmiw2hVh", + "0B4IhzEgO21B", + "cANrUEXhO8QY", + "DEWi6c-pGZV9", + "GwdWdHG0RP5J", + "iq8M8nbTSJlE", + "vZ-YLmJyg64T", + "hOxk-ZoJmamm", + "aVfmE4O3Ku7H", + "AgNV3TMzqSvj" + ], + "machine_shape": "hm" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "accelerator": "GPU", + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "7d8f60bfc0a248e58028b6e8a477a5f7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_72dc1e39b931429883e68c0603797896", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_cde60c5e18f04ba792fff8c2ac33f470", + "IPY_MODEL_c0c0df12695b4a1eacf8fa4ccc0ac62c" + ] + }, + "model_module_version": "1.5.0" + }, + "72dc1e39b931429883e68c0603797896": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "cde60c5e18f04ba792fff8c2ac33f470": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_72ea881ce3f445a9983d858b76dd257b", + "_dom_classes": [], + "description": "Downloading: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 791656, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 791656, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_d0f0c28a14b242f8990a547ed7f87c04" + }, + "model_module_version": "1.5.0" + }, + "c0c0df12695b4a1eacf8fa4ccc0ac62c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_f97741534b554be3b5cdccd45c73b317", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 792k/792k [02:08<00:00, 6.18kB/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_1e70a3dc7090487fa883e932bff395cb" + }, + "model_module_version": "1.5.0" + }, + "72ea881ce3f445a9983d858b76dd257b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "d0f0c28a14b242f8990a547ed7f87c04": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "f97741534b554be3b5cdccd45c73b317": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "1e70a3dc7090487fa883e932bff395cb": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "f414bac332054c7f86af89b8e50c7d73": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_1d9c52a1bb8843b6b0f151571cbf30a4", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_ed039b8125714030b03912fb29a93ca4", + "IPY_MODEL_d9b445b8b3b04569adf22429259b4954" + ] + }, + "model_module_version": "1.5.0" + }, + "1d9c52a1bb8843b6b0f151571cbf30a4": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "ed039b8125714030b03912fb29a93ca4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_6c61b3c76d7045eb825172ba51b3fa63", + "_dom_classes": [], + "description": "Downloading: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 1199, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 1199, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_d11ffd1efc024c1ca86276430d29fd1e" + }, + "model_module_version": "1.5.0" + }, + "d9b445b8b3b04569adf22429259b4954": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_22fac35d924f464ca0b33be21a566a86", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 1.20k/1.20k [00:20<00:00, 58.3B/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_cfe128b0d2c648c18d2255b3f8506a09" + }, + "model_module_version": "1.5.0" + }, + "6c61b3c76d7045eb825172ba51b3fa63": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "d11ffd1efc024c1ca86276430d29fd1e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "22fac35d924f464ca0b33be21a566a86": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "cfe128b0d2c648c18d2255b3f8506a09": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "c34ac6d2548249819c1eab28956edec4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_de2c77b3fb0f4dba99f92062b2db5328", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_6ea23f0979824aac935f3f1ad10a86cd", + "IPY_MODEL_6452bc3b5ad445a8a5e272207fe4504d" + ] + }, + "model_module_version": "1.5.0" + }, + "de2c77b3fb0f4dba99f92062b2db5328": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "6ea23f0979824aac935f3f1ad10a86cd": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_d6ef508766c54f8993d1d1f3d7cac040", + "_dom_classes": [], + "description": "Downloading: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 891691430, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 891691430, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_1b69bbddeb244defab9e21690a45c79e" + }, + "model_module_version": "1.5.0" + }, + "6452bc3b5ad445a8a5e272207fe4504d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_4a2b56fd6780470ab1574509fa432183", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 892M/892M [00:17<00:00, 51.3MB/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_3853231cd966465882a93fad9c5dc428" + }, + "model_module_version": "1.5.0" + }, + "d6ef508766c54f8993d1d1f3d7cac040": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "1b69bbddeb244defab9e21690a45c79e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "4a2b56fd6780470ab1574509fa432183": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "3853231cd966465882a93fad9c5dc428": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "915a0b65612243668570c555a47a6c37": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_c85b348624504af294b78de744969493", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_d56a6918840e4f6588af5da5f8f54015", + "IPY_MODEL_41db48cf488a4522b1f04b33c2261262" + ] + }, + "model_module_version": "1.5.0" + }, + "c85b348624504af294b78de744969493": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": "row wrap", + "width": "100%", + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": "inline-flex", + "left": null + }, + "model_module_version": "1.2.0" + }, + "d56a6918840e4f6588af5da5f8f54015": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_8c2d9ac8c22f486299949f4cbed16437", + "_dom_classes": [], + "description": "Validation sanity check: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "info", + "max": 1, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 1, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_222974dba69145e7b171360bec239ba5" + }, + "model_module_version": "1.5.0" + }, + "41db48cf488a4522b1f04b33c2261262": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_9e95200811bb497ab0ac0229f5e0ddaa", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 5/5 [00:01<00:00, 3.24it/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_3773b14f23974ad3a5bbb7ff947e68ca" + }, + "model_module_version": "1.5.0" + }, + "8c2d9ac8c22f486299949f4cbed16437": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "222974dba69145e7b171360bec239ba5": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": "2", + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "9e95200811bb497ab0ac0229f5e0ddaa": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "3773b14f23974ad3a5bbb7ff947e68ca": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "3ec26f803d124dd0877e1ce0e3517f68": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_aabb0b2f2ae64684a80f1ea39c9a7d1b", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_885696e0606c4353a5d21feec03aebc7", + "IPY_MODEL_659dd7302f3a40038834c4f1d8e59250" + ] + }, + "model_module_version": "1.5.0" + }, + "aabb0b2f2ae64684a80f1ea39c9a7d1b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": "row wrap", + "width": "100%", + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": "inline-flex", + "left": null + }, + "model_module_version": "1.2.0" + }, + "885696e0606c4353a5d21feec03aebc7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_6f3859c80aa945e4b4ae2aa957755b7c", + "_dom_classes": [], + "description": "Epoch 2: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 3125, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 3125, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_a840a738d20b4f43baf18453db53fdf0" + }, + "model_module_version": "1.5.0" + }, + "659dd7302f3a40038834c4f1d8e59250": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_f7139c4e04374ffbafe6a849500c6369", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 3125/3125 [54:28<00:00, 1.05s/it, loss=0.003, v_num=0, val_loss=0.0874]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_ef8f0b7c9b0c4f829e3ad59e83cbdd67" + }, + "model_module_version": "1.5.0" + }, + "6f3859c80aa945e4b4ae2aa957755b7c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "a840a738d20b4f43baf18453db53fdf0": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": "2", + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "f7139c4e04374ffbafe6a849500c6369": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "ef8f0b7c9b0c4f829e3ad59e83cbdd67": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "dbe7a4854b8f420faaea8de4583fb1f0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_4d1f674483d44e559ae1de553dd1d726", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_ce506c0137914e4db93b9db35154c62a", + "IPY_MODEL_e92a181ff64d4e0290236a91cbdb8d67" + ] + }, + "model_module_version": "1.5.0" + }, + "4d1f674483d44e559ae1de553dd1d726": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": "row wrap", + "width": "100%", + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": "inline-flex", + "left": null + }, + "model_module_version": "1.2.0" + }, + "ce506c0137914e4db93b9db35154c62a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_e8f7179c238e4d2d91d456b2c07e1b3e", + "_dom_classes": [], + "description": "Validating: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "info", + "max": 1, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 1, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_e67100d71b5047158ab48ef0fd36cb99" + }, + "model_module_version": "1.5.0" + }, + "e92a181ff64d4e0290236a91cbdb8d67": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_17f7e321de81404dabaa3e84fadce2cf", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 250/250 [00:52<00:00, 4.79it/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_a15e2fcc467242cb9fad5b2082a70c39" + }, + "model_module_version": "1.5.0" + }, + "e8f7179c238e4d2d91d456b2c07e1b3e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "e67100d71b5047158ab48ef0fd36cb99": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": "2", + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "17f7e321de81404dabaa3e84fadce2cf": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "a15e2fcc467242cb9fad5b2082a70c39": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "f40c9bf16c9a473ba758a6439dce2652": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_8d17a251bf1440d4aa8513ad5f15ba1d", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_165319529b364183ae344a9a14f5bc52", + "IPY_MODEL_3d0c08f3abbe421d83f2b35583221291" + ] + }, + "model_module_version": "1.5.0" + }, + "8d17a251bf1440d4aa8513ad5f15ba1d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": "row wrap", + "width": "100%", + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": "inline-flex", + "left": null + }, + "model_module_version": "1.2.0" + }, + "165319529b364183ae344a9a14f5bc52": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_6e851577f682494c894b9afdd07b1201", + "_dom_classes": [], + "description": "Validating: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "info", + "max": 1, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 1, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_e67e9e945a9c430f9844946cd81aae3a" + }, + "model_module_version": "1.5.0" + }, + "3d0c08f3abbe421d83f2b35583221291": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_34fbc6e29df046faaedd9fe3230559cb", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 250/250 [00:53<00:00, 4.71it/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_bbbdd81a2e8f4d68b33d698f45ccc9ae" + }, + "model_module_version": "1.5.0" + }, + "6e851577f682494c894b9afdd07b1201": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "e67e9e945a9c430f9844946cd81aae3a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": "2", + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "34fbc6e29df046faaedd9fe3230559cb": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "bbbdd81a2e8f4d68b33d698f45ccc9ae": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "6aaf51cb9ad44c94b6a174a8768904f7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_51d23e1199274477a69557c74609afb2", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_029f74818c6842d7a28af62032418880", + "IPY_MODEL_8db144e9144141779a1088c4bc000a99" + ] + }, + "model_module_version": "1.5.0" + }, + "51d23e1199274477a69557c74609afb2": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "029f74818c6842d7a28af62032418880": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_210517aede4f4cfab9120fdeb3d8361a", + "_dom_classes": [], + "description": "100%", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 782, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 782, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_df9bc2dc2b3c4fee98affdd7f5ca1ef6" + }, + "model_module_version": "1.5.0" + }, + "8db144e9144141779a1088c4bc000a99": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_b684a47485af4cb1934d57cbb03a4f57", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 782/782 [10:38<00:00, 1.22it/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_942d20b134964d1d895af69938918464" + }, + "model_module_version": "1.5.0" + }, + "210517aede4f4cfab9120fdeb3d8361a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "df9bc2dc2b3c4fee98affdd7f5ca1ef6": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "b684a47485af4cb1934d57cbb03a4f57": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "942d20b134964d1d895af69938918464": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "0037bb8409bb4d65ac4ebd956fd1e631": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_db528e3117024014b4d281b650901cbd", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_350fc08aa59849fc9fd3f3e454583a6c", + "IPY_MODEL_be936dd408314d0d90a22f627ca517ca" + ] + }, + "model_module_version": "1.5.0" + }, + "db528e3117024014b4d281b650901cbd": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "350fc08aa59849fc9fd3f3e454583a6c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_99f56e1a8fdb4b2282fa6e17819d044e", + "_dom_classes": [], + "description": "Downloading: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 791656, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 791656, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_462bd815ddbc4687bcf7695f59919f0c" + }, + "model_module_version": "1.5.0" + }, + "be936dd408314d0d90a22f627ca517ca": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_40edb7d92c1145ee9e3bb823e4688e16", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 792k/792k [00:06<00:00, 131kB/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_f827cd8a6bf846c590913c5ea40e6737" + }, + "model_module_version": "1.5.0" + }, + "99f56e1a8fdb4b2282fa6e17819d044e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "462bd815ddbc4687bcf7695f59919f0c": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "40edb7d92c1145ee9e3bb823e4688e16": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "f827cd8a6bf846c590913c5ea40e6737": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "61d58772a6a64c5c8ad30dab2563a56f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_4000e73e6d804763986dc9a9c74456aa", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_0dd99276ab294c939d83320f4674d5c2", + "IPY_MODEL_d306f7ff1ec94561aeed9ff59ba9b54b" + ] + }, + "model_module_version": "1.5.0" + }, + "4000e73e6d804763986dc9a9c74456aa": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": "row wrap", + "width": "100%", + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": "inline-flex", + "left": null + }, + "model_module_version": "1.2.0" + }, + "0dd99276ab294c939d83320f4674d5c2": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_0893a9730450433fa76a74b008a6f482", + "_dom_classes": [], + "description": "Validation sanity check: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "info", + "max": 1, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 1, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_f8873c7201e1410cb0ec52cb7e34c3c9" + }, + "model_module_version": "1.5.0" + }, + "d306f7ff1ec94561aeed9ff59ba9b54b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_234eb8b041c44358b2f993c2853162f7", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 5/5 [00:01<00:00, 3.74it/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_8f73da698e85474fbecfd91bb7770c56" + }, + "model_module_version": "1.5.0" + }, + "0893a9730450433fa76a74b008a6f482": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "f8873c7201e1410cb0ec52cb7e34c3c9": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": "2", + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "234eb8b041c44358b2f993c2853162f7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "8f73da698e85474fbecfd91bb7770c56": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "26a0cb124049417aa9dbdd010e3af03a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_8a14bd8f2a424b15b48426fd5e320678", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_09ed6242c5ef4a4791a1074ff7e4616e", + "IPY_MODEL_487a6ea92fe0463ebbcb63094fde5136" + ] + }, + "model_module_version": "1.5.0" + }, + "8a14bd8f2a424b15b48426fd5e320678": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": "row wrap", + "width": "100%", + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": "inline-flex", + "left": null + }, + "model_module_version": "1.2.0" + }, + "09ed6242c5ef4a4791a1074ff7e4616e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_c050be8414044acdb1a496495d148302", + "_dom_classes": [], + "description": "Epoch 2: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 2250, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 2250, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_56a67d534f284df0bc1121f1e264f5e2" + }, + "model_module_version": "1.5.0" + }, + "487a6ea92fe0463ebbcb63094fde5136": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_f168c4ae2d014e89bacc58e43427302e", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 2250/2250 [20:19<00:00, 1.84it/s, loss=0.005, v_num=1, val_loss=0.0696]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_5cabe7d5ed6b46be882c558d28a29ca2" + }, + "model_module_version": "1.5.0" + }, + "c050be8414044acdb1a496495d148302": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "56a67d534f284df0bc1121f1e264f5e2": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": "2", + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "f168c4ae2d014e89bacc58e43427302e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "5cabe7d5ed6b46be882c558d28a29ca2": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "1681a9ce7f9340caa50c4204777a6f9e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_a9f0c66f958e493286155c8d2631d255", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_e04d6312d5d4425ab726588c485e668c", + "IPY_MODEL_fab8ee7d5d3940819eb9131efbbad791" + ] + }, + "model_module_version": "1.5.0" + }, + "a9f0c66f958e493286155c8d2631d255": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": "row wrap", + "width": "100%", + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": "inline-flex", + "left": null + }, + "model_module_version": "1.2.0" + }, + "e04d6312d5d4425ab726588c485e668c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_6dd2781f88eb4549b4203dfec9c1a98e", + "_dom_classes": [], + "description": "Validating: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "info", + "max": 1, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 1, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_893ba880ac6545baa6eb4a532ecc5753" + }, + "model_module_version": "1.5.0" + }, + "fab8ee7d5d3940819eb9131efbbad791": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_d4fc7ae628c94a758ce694318bc620ba", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 250/250 [00:48<00:00, 5.24it/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_4c33ca548b5e4738abdac09575e2a325" + }, + "model_module_version": "1.5.0" + }, + "6dd2781f88eb4549b4203dfec9c1a98e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "893ba880ac6545baa6eb4a532ecc5753": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": "2", + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "d4fc7ae628c94a758ce694318bc620ba": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "4c33ca548b5e4738abdac09575e2a325": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "ff475d6cdc074c14aa7b2cfede771b07": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_d77faf8b9ea6480abe594114823ca52f", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_ee4f41b591fe41a5a2d915c343b16c1d", + "IPY_MODEL_d8946214acc44c4cb97688538daaa33f" + ] + }, + "model_module_version": "1.5.0" + }, + "d77faf8b9ea6480abe594114823ca52f": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": "row wrap", + "width": "100%", + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": "inline-flex", + "left": null + }, + "model_module_version": "1.2.0" + }, + "ee4f41b591fe41a5a2d915c343b16c1d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_9b9306452732495cbb1acd3e2fcf3b69", + "_dom_classes": [], + "description": "Validating: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "info", + "max": 1, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 1, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_f42e9e596ad0485b842fee92d1884750" + }, + "model_module_version": "1.5.0" + }, + "d8946214acc44c4cb97688538daaa33f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_1d9f8718ba4d4b60997757ea7f1db72b", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 250/250 [00:48<00:00, 5.22it/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_63db466ae63b42a5a79d051ef5af653e" + }, + "model_module_version": "1.5.0" + }, + "9b9306452732495cbb1acd3e2fcf3b69": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "f42e9e596ad0485b842fee92d1884750": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": "2", + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "1d9f8718ba4d4b60997757ea7f1db72b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "63db466ae63b42a5a79d051ef5af653e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "8933ab7f935e4776970ddfe35f5da135": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_84eb2bf17a9048fc94b6f47867d1b0ba", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_cdd7554792cf4c73922e2f050d1fcaaf", + "IPY_MODEL_a32aa193a82f478387c14f384c2c689e" + ] + }, + "model_module_version": "1.5.0" + }, + "84eb2bf17a9048fc94b6f47867d1b0ba": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "cdd7554792cf4c73922e2f050d1fcaaf": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_e4cbd76c110541cbbf1386e299c4d9d6", + "_dom_classes": [], + "description": "100%", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 63, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 63, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_da67548f1abc4727965f72b8cb367681" + }, + "model_module_version": "1.5.0" + }, + "a32aa193a82f478387c14f384c2c689e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_63b11aa7ee0c4271aedb87ad3e7d23c3", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 63/63 [53:03<00:00, 50.53s/it]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_720b90b3f86c4e5da15447777806e9a7" + }, + "model_module_version": "1.5.0" + }, + "e4cbd76c110541cbbf1386e299c4d9d6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "da67548f1abc4727965f72b8cb367681": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "63b11aa7ee0c4271aedb87ad3e7d23c3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "720b90b3f86c4e5da15447777806e9a7": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "78b1b91a08214461b74fb1e143247d1e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_902a509471004d2691d807c4990fccd2", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_74ec15497e1743a4af6be12e3bc1487d", + "IPY_MODEL_a70b457d9379403f9fac247de68bb8e3" + ] + }, + "model_module_version": "1.5.0" + }, + "902a509471004d2691d807c4990fccd2": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "74ec15497e1743a4af6be12e3bc1487d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_28f9d9aa0ece4831b0f9e412d8a88f8d", + "_dom_classes": [], + "description": "Downloading: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 791656, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 791656, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_7640680e1006492da75d873726567fed" + }, + "model_module_version": "1.5.0" + }, + "a70b457d9379403f9fac247de68bb8e3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_1090e3e017564a2281c60fb53a901c75", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 792k/792k [00:04<00:00, 191kB/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_9df2679ba627444e9b76bd2ff0ddc657" + }, + "model_module_version": "1.5.0" + }, + "28f9d9aa0ece4831b0f9e412d8a88f8d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "7640680e1006492da75d873726567fed": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "1090e3e017564a2281c60fb53a901c75": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "9df2679ba627444e9b76bd2ff0ddc657": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "5c7427d7db844b9691d30cf2de1efc17": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_bb0df1833ee3489da5c2a9c7b1306cc6", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_3d2817812b6f475a8c838fd14646469a", + "IPY_MODEL_9d0f0c946790477fb8bc8bac64dfd7de" + ] + }, + "model_module_version": "1.5.0" + }, + "bb0df1833ee3489da5c2a9c7b1306cc6": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "3d2817812b6f475a8c838fd14646469a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_8254b8062d5e4280bea46f8bc444c5db", + "_dom_classes": [], + "description": "Downloading: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 1199, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 1199, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_ab5f07ab5c574148a0062eb7f1ce5bcd" + }, + "model_module_version": "1.5.0" + }, + "9d0f0c946790477fb8bc8bac64dfd7de": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_47fdc2009efc443392ecd182996fcca9", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 1.20k/1.20k [00:42<00:00, 28.4B/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_9b705e83fea84cbf912e33d6342be721" + }, + "model_module_version": "1.5.0" + }, + "8254b8062d5e4280bea46f8bc444c5db": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "ab5f07ab5c574148a0062eb7f1ce5bcd": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "47fdc2009efc443392ecd182996fcca9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "9b705e83fea84cbf912e33d6342be721": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "e8e8ea6199df43019930ac7b557c46a5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_0566f29b017f47f399d7579d7929e046", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_932309f0a40b46659c0cac7cc37fdc05", + "IPY_MODEL_da3665141bd44a24a5b5c9f36d4a9c52" + ] + }, + "model_module_version": "1.5.0" + }, + "0566f29b017f47f399d7579d7929e046": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "932309f0a40b46659c0cac7cc37fdc05": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_5c98e3a5b6a6403a936a725f4c30cdd3", + "_dom_classes": [], + "description": "Downloading: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 891691430, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 891691430, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_8da2b560fa9348098a2a7f09967d5f5f" + }, + "model_module_version": "1.5.0" + }, + "da3665141bd44a24a5b5c9f36d4a9c52": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_7e37cac227014717987922341f8099fe", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 892M/892M [00:38<00:00, 23.2MB/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_b95f98f98a76434591f90d41b43e39ba" + }, + "model_module_version": "1.5.0" + }, + "5c98e3a5b6a6403a936a725f4c30cdd3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "8da2b560fa9348098a2a7f09967d5f5f": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "7e37cac227014717987922341f8099fe": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "b95f98f98a76434591f90d41b43e39ba": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "8e79d03deee94b299431330441bd64c8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_510043ffee634f86b89ec3fc060a74ea", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_e86c5fbd48ce4215a0df353122183982", + "IPY_MODEL_bfc3a5a3cf2e49868053db6f1ef7785d" + ] + }, + "model_module_version": "1.5.0" + }, + "510043ffee634f86b89ec3fc060a74ea": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": "row wrap", + "width": "100%", + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": "inline-flex", + "left": null + }, + "model_module_version": "1.2.0" + }, + "e86c5fbd48ce4215a0df353122183982": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_361a2f79ed89495894d0b09a709f8f32", + "_dom_classes": [], + "description": "Validation sanity check: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "info", + "max": 1, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 1, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_f7e53d55f0234627a3b9f2c90eb8682f" + }, + "model_module_version": "1.5.0" + }, + "bfc3a5a3cf2e49868053db6f1ef7785d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_3584c01b0c5e47dfa373bae29461e94a", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 5/5 [00:01<00:00, 3.50it/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_cfd9db6f31474a8189e741bf8fdad6a9" + }, + "model_module_version": "1.5.0" + }, + "361a2f79ed89495894d0b09a709f8f32": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "f7e53d55f0234627a3b9f2c90eb8682f": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": "2", + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "3584c01b0c5e47dfa373bae29461e94a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "cfd9db6f31474a8189e741bf8fdad6a9": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "68705cee3df5458fb5145046337d925c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_4cf1613d58bd450780ac95c994686985", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_3ee5f7cf56394175900ebb14ae0b5f9e", + "IPY_MODEL_9f054dcf926c45459b7aa728493571a0" + ] + }, + "model_module_version": "1.5.0" + }, + "4cf1613d58bd450780ac95c994686985": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": "row wrap", + "width": "100%", + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": "inline-flex", + "left": null + }, + "model_module_version": "1.2.0" + }, + "3ee5f7cf56394175900ebb14ae0b5f9e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_b52599dda9d94c83891d1c42c5f557e0", + "_dom_classes": [], + "description": "Epoch 3: 3%", + "_model_name": "FloatProgressModel", + "bar_style": "danger", + "max": 11694, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 396, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_a1cf907a3bcc4177b1d5dd9edbf30c20" + }, + "model_module_version": "1.5.0" + }, + "9f054dcf926c45459b7aa728493571a0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_82b29ceeb21c417782e9e29a81eb47ea", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 396/11694 [03:40<1:44:57, 1.79it/s, loss=0.017, v_num=0, val_loss=0.327]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_886260804ffd4e11bc93fb6e098111ab" + }, + "model_module_version": "1.5.0" + }, + "b52599dda9d94c83891d1c42c5f557e0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "a1cf907a3bcc4177b1d5dd9edbf30c20": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": "2", + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "82b29ceeb21c417782e9e29a81eb47ea": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "886260804ffd4e11bc93fb6e098111ab": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "69f6eb1cb0434128961b5d83529813c5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_6723d50588a248d0ad7bb118de8c3fd5", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_86d71b8233c14252a897ffa29ea6d9df", + "IPY_MODEL_d01c708e22ab423896271fa79860e7c3" + ] + }, + "model_module_version": "1.5.0" + }, + "6723d50588a248d0ad7bb118de8c3fd5": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": "row wrap", + "width": "100%", + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": "inline-flex", + "left": null + }, + "model_module_version": "1.2.0" + }, + "86d71b8233c14252a897ffa29ea6d9df": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_0e8da5995754472fac5fba1f8b30d107", + "_dom_classes": [], + "description": "Validating: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "info", + "max": 1, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 1, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_3dbee77f299f4e14a1698b60d609b8a1" + }, + "model_module_version": "1.5.0" + }, + "d01c708e22ab423896271fa79860e7c3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_8c4c9025aaae44148591ae6f8bb37347", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 2501/2501 [07:28<00:00, 5.90it/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_29e2f2f0914e4dea8117844675b42be5" + }, + "model_module_version": "1.5.0" + }, + "0e8da5995754472fac5fba1f8b30d107": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "3dbee77f299f4e14a1698b60d609b8a1": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": "2", + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "8c4c9025aaae44148591ae6f8bb37347": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "29e2f2f0914e4dea8117844675b42be5": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "0cfc8fa73f164b4fa5ddcbc3f115ef9b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_4559bd35b33f4804b968debaaf316463", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_e403cc7718bf48f1b95150482e083f02", + "IPY_MODEL_f6248a9db7f2466a9ab3a4fbd214f265" + ] + }, + "model_module_version": "1.5.0" + }, + "4559bd35b33f4804b968debaaf316463": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": "row wrap", + "width": "100%", + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": "inline-flex", + "left": null + }, + "model_module_version": "1.2.0" + }, + "e403cc7718bf48f1b95150482e083f02": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_475e5353d31147d3ab156c0e7835684c", + "_dom_classes": [], + "description": "Validating: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "info", + "max": 1, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 1, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_c3f65d683c6e4fe18e31ecc305f8d455" + }, + "model_module_version": "1.5.0" + }, + "f6248a9db7f2466a9ab3a4fbd214f265": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_9b50abad66b44022aa389bc3f312db6b", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 2501/2501 [07:25<00:00, 5.90it/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_762b2941ff3e47d89b6e6ce4350bc058" + }, + "model_module_version": "1.5.0" + }, + "475e5353d31147d3ab156c0e7835684c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "c3f65d683c6e4fe18e31ecc305f8d455": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": "2", + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "9b50abad66b44022aa389bc3f312db6b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "762b2941ff3e47d89b6e6ce4350bc058": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "1597779d89464892885045be715890a8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_8a42468ed6b945e8bfce1803f3ea4452", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_f87eae824cf1492b9555b78648a9f261", + "IPY_MODEL_6cd0d574b5fd43588b8d492674125218" + ] + }, + "model_module_version": "1.5.0" + }, + "8a42468ed6b945e8bfce1803f3ea4452": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "f87eae824cf1492b9555b78648a9f261": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_17b25142ac744ba882e2bbd1f42c1db2", + "_dom_classes": [], + "description": "100%", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 626, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 626, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_09185d325ef84c1fad7b07fbd9eeed31" + }, + "model_module_version": "1.5.0" + }, + "6cd0d574b5fd43588b8d492674125218": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_ba31765789dc46229493674dab21921d", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 626/626 [06:35<00:00, 1.58it/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_a9dd88fb73374e108482b80993b998eb" + }, + "model_module_version": "1.5.0" + }, + "17b25142ac744ba882e2bbd1f42c1db2": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "09185d325ef84c1fad7b07fbd9eeed31": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + }, + "ba31765789dc46229493674dab21921d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + }, + "model_module_version": "1.5.0" + }, + "a9dd88fb73374e108482b80993b998eb": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + }, + "model_module_version": "1.2.0" + } + } + } + }, + "cells": [ + { + "cell_type": "code", + "metadata": { + "id": "PJX4vkjj6wYz", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 121 + }, + "outputId": "83a8a420-48cd-4d49-bc60-2693268481c6" + }, + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/drive')" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly\n", + "\n", + "Enter your authorization code:\n", + "··········\n", + "Mounted at /content/drive\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "1V5cInhu42Wk", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 302 + }, + "outputId": "5501a5f1-fc49-4df7-f7a0-31cc37647337" + }, + "source": [ + "!nvidia-smi" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Sat May 9 15:05:25 2020 \n", + "+-----------------------------------------------------------------------------+\n", + "| NVIDIA-SMI 440.82 Driver Version: 418.67 CUDA Version: 10.1 |\n", + "|-------------------------------+----------------------+----------------------+\n", + "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n", + "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n", + "|===============================+======================+======================|\n", + "| 0 Tesla P100-PCIE... Off | 00000000:00:04.0 Off | 0 |\n", + "| N/A 34C P0 25W / 250W | 0MiB / 16280MiB | 0% Default |\n", + "+-------------------------------+----------------------+----------------------+\n", + " \n", + "+-----------------------------------------------------------------------------+\n", + "| Processes: GPU Memory |\n", + "| GPU PID Type Process name Usage |\n", + "|=============================================================================|\n", + "| No running processes found |\n", + "+-----------------------------------------------------------------------------+\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "epWcPHhJ7v7j" + }, + "source": [ + "Instal apex if you want to do 16 bit training. You'll probably need to restart the notebook after installing apex" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "k1Xy7ZG-7gHt" + }, + "source": [ + "# !export CUDA_HOME=/usr/local/cuda-10.1\n", + "# !git clone https://github.com/NVIDIA/apex\n", + "# !pip install -v --no-cache-dir --global-option=\"--cpp_ext\" --global-option=\"--cuda_ext\" ./apex" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "SDVQ04fGRb1v", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "11689986-ca27-4ab0-f14d-5ee4f0eba40d" + }, + "source": [ + "!pip install transformers\n", + "!pip install pytorch_lightning" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Collecting transformers\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/cd/38/c9527aa055241c66c4d785381eaf6f80a28c224cae97daa1f8b183b5fabb/transformers-2.9.0-py3-none-any.whl (635kB)\n", + "\r\u001b[K |▌ | 10kB 20.5MB/s eta 0:00:01\r\u001b[K |█ | 20kB 1.7MB/s eta 0:00:01\r\u001b[K |█▌ | 30kB 2.3MB/s eta 0:00:01\r\u001b[K |██ | 40kB 2.6MB/s eta 0:00:01\r\u001b[K |██▋ | 51kB 2.0MB/s eta 0:00:01\r\u001b[K |███ | 61kB 2.3MB/s eta 0:00:01\r\u001b[K |███▋ | 71kB 2.5MB/s eta 0:00:01\r\u001b[K |████▏ | 81kB 2.7MB/s eta 0:00:01\r\u001b[K |████▋ | 92kB 3.0MB/s eta 0:00:01\r\u001b[K |█████▏ | 102kB 2.8MB/s eta 0:00:01\r\u001b[K |█████▊ | 112kB 2.8MB/s eta 0:00:01\r\u001b[K |██████▏ | 122kB 2.8MB/s eta 0:00:01\r\u001b[K |██████▊ | 133kB 2.8MB/s eta 0:00:01\r\u001b[K |███████▏ | 143kB 2.8MB/s eta 0:00:01\r\u001b[K |███████▊ | 153kB 2.8MB/s eta 0:00:01\r\u001b[K |████████▎ | 163kB 2.8MB/s eta 0:00:01\r\u001b[K |████████▊ | 174kB 2.8MB/s eta 0:00:01\r\u001b[K |█████████▎ | 184kB 2.8MB/s eta 0:00:01\r\u001b[K |█████████▉ | 194kB 2.8MB/s eta 0:00:01\r\u001b[K |██████████▎ | 204kB 2.8MB/s eta 0:00:01\r\u001b[K |██████████▉ | 215kB 2.8MB/s eta 0:00:01\r\u001b[K |███████████▍ | 225kB 2.8MB/s eta 0:00:01\r\u001b[K |███████████▉ | 235kB 2.8MB/s eta 0:00:01\r\u001b[K |████████████▍ | 245kB 2.8MB/s eta 0:00:01\r\u001b[K |████████████▉ | 256kB 2.8MB/s eta 0:00:01\r\u001b[K |█████████████▍ | 266kB 2.8MB/s eta 0:00:01\r\u001b[K |██████████████ | 276kB 2.8MB/s eta 0:00:01\r\u001b[K |██████████████▍ | 286kB 2.8MB/s eta 0:00:01\r\u001b[K |███████████████ | 296kB 2.8MB/s eta 0:00:01\r\u001b[K |███████████████▌ | 307kB 2.8MB/s eta 0:00:01\r\u001b[K |████████████████ | 317kB 2.8MB/s eta 0:00:01\r\u001b[K |████████████████▌ | 327kB 2.8MB/s eta 0:00:01\r\u001b[K |█████████████████ | 337kB 2.8MB/s eta 0:00:01\r\u001b[K |█████████████████▌ | 348kB 2.8MB/s eta 0:00:01\r\u001b[K |██████████████████ | 358kB 2.8MB/s eta 0:00:01\r\u001b[K |██████████████████▋ | 368kB 2.8MB/s eta 0:00:01\r\u001b[K |███████████████████ | 378kB 2.8MB/s eta 0:00:01\r\u001b[K |███████████████████▋ | 389kB 2.8MB/s eta 0:00:01\r\u001b[K |████████████████████ | 399kB 2.8MB/s eta 0:00:01\r\u001b[K |████████████████████▋ | 409kB 2.8MB/s eta 0:00:01\r\u001b[K |█████████████████████▏ | 419kB 2.8MB/s eta 0:00:01\r\u001b[K |█████████████████████▋ | 430kB 2.8MB/s eta 0:00:01\r\u001b[K |██████████████████████▏ | 440kB 2.8MB/s eta 0:00:01\r\u001b[K |██████████████████████▊ | 450kB 2.8MB/s eta 0:00:01\r\u001b[K |███████████████████████▏ | 460kB 2.8MB/s eta 0:00:01\r\u001b[K |███████████████████████▊ | 471kB 2.8MB/s eta 0:00:01\r\u001b[K |████████████████████████▎ | 481kB 2.8MB/s eta 0:00:01\r\u001b[K |████████████████████████▊ | 491kB 2.8MB/s eta 0:00:01\r\u001b[K |█████████████████████████▎ | 501kB 2.8MB/s eta 0:00:01\r\u001b[K |█████████████████████████▊ | 512kB 2.8MB/s eta 0:00:01\r\u001b[K |██████████████████████████▎ | 522kB 2.8MB/s eta 0:00:01\r\u001b[K |██████████████████████████▉ | 532kB 2.8MB/s eta 0:00:01\r\u001b[K |███████████████████████████▎ | 542kB 2.8MB/s eta 0:00:01\r\u001b[K |███████████████████████████▉ | 552kB 2.8MB/s eta 0:00:01\r\u001b[K |████████████████████████████▍ | 563kB 2.8MB/s eta 0:00:01\r\u001b[K |████████████████████████████▉ | 573kB 2.8MB/s eta 0:00:01\r\u001b[K |█████████████████████████████▍ | 583kB 2.8MB/s eta 0:00:01\r\u001b[K |██████████████████████████████ | 593kB 2.8MB/s eta 0:00:01\r\u001b[K |██████████████████████████████▍ | 604kB 2.8MB/s eta 0:00:01\r\u001b[K |███████████████████████████████ | 614kB 2.8MB/s eta 0:00:01\r\u001b[K |███████████████████████████████▍| 624kB 2.8MB/s eta 0:00:01\r\u001b[K |████████████████████████████████| 634kB 2.8MB/s eta 0:00:01\r\u001b[K |████████████████████████████████| 645kB 2.8MB/s \n", + "\u001b[?25hRequirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from transformers) (2.23.0)\n", + "Requirement already satisfied: dataclasses; python_version < \"3.7\" in /usr/local/lib/python3.6/dist-packages (from transformers) (0.7)\n", + "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.6/dist-packages (from transformers) (4.41.1)\n", + "Collecting tokenizers==0.7.0\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/14/e5/a26eb4716523808bb0a799fcfdceb6ebf77a18169d9591b2f46a9adb87d9/tokenizers-0.7.0-cp36-cp36m-manylinux1_x86_64.whl (3.8MB)\n", + "\u001b[K |████████████████████████████████| 3.8MB 12.8MB/s \n", + "\u001b[?25hRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.6/dist-packages (from transformers) (2019.12.20)\n", + "Collecting sacremoses\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/7d/34/09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10/sacremoses-0.0.43.tar.gz (883kB)\n", + "\u001b[K |████████████████████████████████| 890kB 28.2MB/s \n", + "\u001b[?25hCollecting sentencepiece\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/98/2c/8df20f3ac6c22ac224fff307ebc102818206c53fc454ecd37d8ac2060df5/sentencepiece-0.1.86-cp36-cp36m-manylinux1_x86_64.whl (1.0MB)\n", + "\u001b[K |████████████████████████████████| 1.0MB 42.6MB/s \n", + "\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from transformers) (1.18.4)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.6/dist-packages (from transformers) (3.0.12)\n", + "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (3.0.4)\n", + "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (1.24.3)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (2020.4.5.1)\n", + "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (2.9)\n", + "Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (1.12.0)\n", + "Requirement already satisfied: click in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (7.1.2)\n", + "Requirement already satisfied: joblib in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (0.14.1)\n", + "Building wheels for collected packages: sacremoses\n", + " Building wheel for sacremoses (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for sacremoses: filename=sacremoses-0.0.43-cp36-none-any.whl size=893260 sha256=1d6422ddbf7526c5762d09193b36548e7b07bf9cc526057f833254f31a68c87c\n", + " Stored in directory: /root/.cache/pip/wheels/29/3c/fd/7ce5c3f0666dab31a50123635e6fb5e19ceb42ce38d4e58f45\n", + "Successfully built sacremoses\n", + "Installing collected packages: tokenizers, sacremoses, sentencepiece, transformers\n", + "Successfully installed sacremoses-0.0.43 sentencepiece-0.1.86 tokenizers-0.7.0 transformers-2.9.0\n", + "Collecting pytorch_lightning\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/75/ac/ac03f1f3fa950d96ca52f07d33fdbf5add05f164c1ac4eae179231dfa93d/pytorch_lightning-0.7.5-py3-none-any.whl (233kB)\n", + "\u001b[K |████████████████████████████████| 235kB 2.8MB/s \n", + "\u001b[?25hRequirement already satisfied: numpy>=1.16.4 in /usr/local/lib/python3.6/dist-packages (from pytorch_lightning) (1.18.4)\n", + "Requirement already satisfied: tqdm>=4.41.0 in /usr/local/lib/python3.6/dist-packages (from pytorch_lightning) (4.41.1)\n", + "Collecting future>=0.17.1\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/45/0b/38b06fd9b92dc2b68d58b75f900e97884c45bedd2ff83203d933cf5851c9/future-0.18.2.tar.gz (829kB)\n", + "\u001b[K |████████████████████████████████| 829kB 8.4MB/s \n", + "\u001b[?25hRequirement already satisfied: tensorboard>=1.14 in /usr/local/lib/python3.6/dist-packages (from pytorch_lightning) (2.2.1)\n", + "Requirement already satisfied: torch>=1.1 in /usr/local/lib/python3.6/dist-packages (from pytorch_lightning) (1.5.0+cu101)\n", + "Requirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard>=1.14->pytorch_lightning) (1.12.0)\n", + "Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard>=1.14->pytorch_lightning) (1.6.0.post3)\n", + "Requirement already satisfied: google-auth<2,>=1.6.3 in /usr/local/lib/python3.6/dist-packages (from tensorboard>=1.14->pytorch_lightning) (1.7.2)\n", + "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.6/dist-packages (from tensorboard>=1.14->pytorch_lightning) (3.2.1)\n", + "Requirement already satisfied: wheel>=0.26; python_version >= \"3\" in /usr/local/lib/python3.6/dist-packages (from tensorboard>=1.14->pytorch_lightning) (0.34.2)\n", + "Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /usr/local/lib/python3.6/dist-packages (from tensorboard>=1.14->pytorch_lightning) (0.4.1)\n", + "Requirement already satisfied: grpcio>=1.24.3 in /usr/local/lib/python3.6/dist-packages (from tensorboard>=1.14->pytorch_lightning) (1.28.1)\n", + "Requirement already satisfied: protobuf>=3.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard>=1.14->pytorch_lightning) (3.10.0)\n", + "Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.6/dist-packages (from tensorboard>=1.14->pytorch_lightning) (0.9.0)\n", + "Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard>=1.14->pytorch_lightning) (2.23.0)\n", + "Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard>=1.14->pytorch_lightning) (46.1.3)\n", + "Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/lib/python3.6/dist-packages (from tensorboard>=1.14->pytorch_lightning) (1.0.1)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard>=1.14->pytorch_lightning) (0.2.8)\n", + "Requirement already satisfied: cachetools<3.2,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard>=1.14->pytorch_lightning) (3.1.1)\n", + "Requirement already satisfied: rsa<4.1,>=3.1.4 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard>=1.14->pytorch_lightning) (4.0)\n", + "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard>=1.14->pytorch_lightning) (1.3.0)\n", + "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard>=1.14->pytorch_lightning) (2.9)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard>=1.14->pytorch_lightning) (2020.4.5.1)\n", + "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard>=1.14->pytorch_lightning) (1.24.3)\n", + "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard>=1.14->pytorch_lightning) (3.0.4)\n", + "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.6/dist-packages (from pyasn1-modules>=0.2.1->google-auth<2,>=1.6.3->tensorboard>=1.14->pytorch_lightning) (0.4.8)\n", + "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard>=1.14->pytorch_lightning) (3.1.0)\n", + "Building wheels for collected packages: future\n", + " Building wheel for future (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for future: filename=future-0.18.2-cp36-none-any.whl size=491057 sha256=2748e4b7f9acd3e1e87b8118cdcb4cb5a4bf5ed682d99f3866e26265ab336042\n", + " Stored in directory: /root/.cache/pip/wheels/8b/99/a0/81daf51dcd359a9377b110a8a886b3895921802d2fc1b2397e\n", + "Successfully built future\n", + "Installing collected packages: future, pytorch-lightning\n", + " Found existing installation: future 0.16.0\n", + " Uninstalling future-0.16.0:\n", + " Successfully uninstalled future-0.16.0\n", + "Successfully installed future-0.18.2 pytorch-lightning-0.7.5\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HVxGfmEMCKs_" + }, + "source": [ + "## T5 fine-tuning\n", + "\n", + "This notebook is to showcase how to fine-tune [T5 model](https://arxiv.org/abs/1910.10683) with Huggigface's [Transformers](https://github.com/huggingface/transformers/) to solve different NLP tasks using text-2-text approach proposed in the T5 paper. For demo I chose 3 non text-2-text problems just to reiterate the fact from the paper that how widely applicable this text-2-text framework is and how it can be used for different tasks without changing the model at all.\n", + "\n", + "This is a rough draft so if you find any issues with this notebook or have any questions reach out to me via [Twitter](https://twitter.com/psuraj28).\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "HS8mNXq6bdxq", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 84 + }, + "outputId": "b0a32f10-f2ef-4d49-b433-266e8206040b" + }, + "source": [ + "import argparse\n", + "import glob\n", + "import os\n", + "import json\n", + "import time\n", + "import logging\n", + "import random\n", + "import re\n", + "from itertools import chain\n", + "from string import punctuation\n", + "\n", + "import nltk\n", + "nltk.download('punkt')\n", + "from nltk.tokenize import sent_tokenize\n", + "\n", + "import pandas as pd\n", + "import numpy as np\n", + "import torch\n", + "from torch.utils.data import Dataset, DataLoader\n", + "import pytorch_lightning as pl\n", + "\n", + "\n", + "from transformers import (\n", + " AdamW,\n", + " T5ForConditionalGeneration,\n", + " T5Tokenizer,\n", + " get_linear_schedule_with_warmup\n", + ")" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package punkt to /root/nltk_data...\n", + "[nltk_data] Unzipping tokenizers/punkt.zip.\n" + ], + "name": "stdout" + }, + { + "output_type": "stream", + "text": [ + "INFO:transformers.file_utils:PyTorch version 1.5.0+cu101 available.\n", + "INFO:transformers.file_utils:TensorFlow version 2.2.0-rc4 available.\n" + ], + "name": "stderr" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "IswYuhWaz7QJ" + }, + "source": [ + "def set_seed(seed):\n", + " random.seed(seed)\n", + " np.random.seed(seed)\n", + " torch.manual_seed(seed)\n", + " if torch.cuda.is_available():\n", + " torch.cuda.manual_seed_all(seed)\n", + "\n", + "set_seed(42)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RKNr7fgzcKpZ" + }, + "source": [ + "## Model\n", + "\n", + "We'll be using the awesome [pytorch-lightning](https://github.com/PytorchLightning/pytorch-lightning) library for training. Most of the below code is adapted from here https://github.com/huggingface/transformers/blob/master/examples/lightning_base.py\n", + "\n", + "The trainer is generic and can be used for any text-2-text task. You'll just need to change the dataset. Rest of the code will stay unchanged for all the tasks.\n", + "\n", + "This is the most intresting and powrfull thing about the text-2-text format. You can fine-tune the model on variety of NLP tasks by just formulating the problem in text-2-text setting. No need to change hyperparameters, learning rate, optimizer or loss function. Just plug in your dataset and you are ready to go!" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "B7uVNBtXST5X" + }, + "source": [ + "class T5FineTuner(pl.LightningModule):\n", + " def __init__(self, hparams):\n", + " super(T5FineTuner, self).__init__()\n", + " self.hparams = hparams\n", + "\n", + " self.model = T5ForConditionalGeneration.from_pretrained(hparams.model_name_or_path)\n", + " self.tokenizer = T5Tokenizer.from_pretrained(hparams.tokenizer_name_or_path)\n", + "\n", + " def is_logger(self):\n", + " return self.trainer.proc_rank <= 0\n", + "\n", + " def forward(\n", + " self, input_ids, attention_mask=None, decoder_input_ids=None, decoder_attention_mask=None, lm_labels=None\n", + " ):\n", + " return self.model(\n", + " input_ids,\n", + " attention_mask=attention_mask,\n", + " decoder_input_ids=decoder_input_ids,\n", + " decoder_attention_mask=decoder_attention_mask,\n", + " lm_labels=lm_labels,\n", + " )\n", + "\n", + " def _step(self, batch):\n", + " lm_labels = batch[\"target_ids\"]\n", + " lm_labels[lm_labels[:, :] == self.tokenizer.pad_token_id] = -100\n", + "\n", + " outputs = self(\n", + " input_ids=batch[\"source_ids\"],\n", + " attention_mask=batch[\"source_mask\"],\n", + " lm_labels=lm_labels,\n", + " decoder_attention_mask=batch['target_mask']\n", + " )\n", + "\n", + " loss = outputs[0]\n", + "\n", + " return loss\n", + "\n", + " def training_step(self, batch, batch_idx):\n", + " loss = self._step(batch)\n", + "\n", + " tensorboard_logs = {\"train_loss\": loss}\n", + " return {\"loss\": loss, \"log\": tensorboard_logs}\n", + "\n", + " def training_epoch_end(self, outputs):\n", + " avg_train_loss = torch.stack([x[\"loss\"] for x in outputs]).mean()\n", + " tensorboard_logs = {\"avg_train_loss\": avg_train_loss}\n", + " return {\"avg_train_loss\": avg_train_loss, \"log\": tensorboard_logs, 'progress_bar': tensorboard_logs}\n", + "\n", + " def validation_step(self, batch, batch_idx):\n", + " loss = self._step(batch)\n", + " return {\"val_loss\": loss}\n", + "\n", + " def validation_epoch_end(self, outputs):\n", + " avg_loss = torch.stack([x[\"val_loss\"] for x in outputs]).mean()\n", + " tensorboard_logs = {\"val_loss\": avg_loss}\n", + " return {\"avg_val_loss\": avg_loss, \"log\": tensorboard_logs, 'progress_bar': tensorboard_logs}\n", + "\n", + " def configure_optimizers(self):\n", + " \"Prepare optimizer and schedule (linear warmup and decay)\"\n", + "\n", + " model = self.model\n", + " no_decay = [\"bias\", \"LayerNorm.weight\"]\n", + " optimizer_grouped_parameters = [\n", + " {\n", + " \"params\": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],\n", + " \"weight_decay\": self.hparams.weight_decay,\n", + " },\n", + " {\n", + " \"params\": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)],\n", + " \"weight_decay\": 0.0,\n", + " },\n", + " ]\n", + " optimizer = AdamW(optimizer_grouped_parameters, lr=self.hparams.learning_rate, eps=self.hparams.adam_epsilon)\n", + " self.opt = optimizer\n", + " return [optimizer]\n", + "\n", + " def optimizer_step(self, epoch, batch_idx, optimizer, optimizer_idx, second_order_closure=None):\n", + " if self.trainer.use_tpu:\n", + " xm.optimizer_step(optimizer)\n", + " else:\n", + " optimizer.step()\n", + " optimizer.zero_grad()\n", + " self.lr_scheduler.step()\n", + "\n", + " def get_tqdm_dict(self):\n", + " tqdm_dict = {\"loss\": \"{:.3f}\".format(self.trainer.avg_loss), \"lr\": self.lr_scheduler.get_last_lr()[-1]}\n", + "\n", + " return tqdm_dict\n", + "\n", + " def train_dataloader(self):\n", + " train_dataset = get_dataset(tokenizer=self.tokenizer, type_path=\"train\", args=self.hparams)\n", + " dataloader = DataLoader(train_dataset, batch_size=self.hparams.train_batch_size, drop_last=True, shuffle=True, num_workers=4)\n", + " t_total = (\n", + " (len(dataloader.dataset) // (self.hparams.train_batch_size * max(1, self.hparams.n_gpu)))\n", + " // self.hparams.gradient_accumulation_steps\n", + " * float(self.hparams.num_train_epochs)\n", + " )\n", + " scheduler = get_linear_schedule_with_warmup(\n", + " self.opt, num_warmup_steps=self.hparams.warmup_steps, num_training_steps=t_total\n", + " )\n", + " self.lr_scheduler = scheduler\n", + " return dataloader\n", + "\n", + " def val_dataloader(self):\n", + " val_dataset = get_dataset(tokenizer=self.tokenizer, type_path=\"val\", args=self.hparams)\n", + " return DataLoader(val_dataset, batch_size=self.hparams.eval_batch_size, num_workers=4)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "oh1R5C-GwMqx" + }, + "source": [ + "logger = logging.getLogger(__name__)\n", + "\n", + "class LoggingCallback(pl.Callback):\n", + " def on_validation_end(self, trainer, pl_module):\n", + " logger.info(\"***** Validation results *****\")\n", + " if pl_module.is_logger():\n", + " metrics = trainer.callback_metrics\n", + " # Log results\n", + " for key in sorted(metrics):\n", + " if key not in [\"log\", \"progress_bar\"]:\n", + " logger.info(\"{} = {}\\n\".format(key, str(metrics[key])))\n", + "\n", + " def on_test_end(self, trainer, pl_module):\n", + " logger.info(\"***** Test results *****\")\n", + "\n", + " if pl_module.is_logger():\n", + " metrics = trainer.callback_metrics\n", + "\n", + " # Log and save results to file\n", + " output_test_results_file = os.path.join(pl_module.hparams.output_dir, \"test_results.txt\")\n", + " with open(output_test_results_file, \"w\") as writer:\n", + " for key in sorted(metrics):\n", + " if key not in [\"log\", \"progress_bar\"]:\n", + " logger.info(\"{} = {}\\n\".format(key, str(metrics[key])))\n", + " writer.write(\"{} = {}\\n\".format(key, str(metrics[key])))" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "a4hjvsBJ5Zk5" + }, + "source": [ + "Let's define the hyperparameters and other arguments. You can overide this `dict` for specific task as needed. While in most of cases you'll only need to change the `data_dir`and `output_dir`.\n", + "\n", + "Here the batch size is 8 and gradient_accumulation_steps are 16 so the effective batch size is 128" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "urduopvizqTq" + }, + "source": [ + "args_dict = dict(\n", + " data_dir=\"\", # path for data files\n", + " output_dir=\"\", # path to save the checkpoints\n", + " model_name_or_path='t5-base',\n", + " tokenizer_name_or_path='t5-base',\n", + " max_seq_length=512,\n", + " learning_rate=1e-4,\n", + " weight_decay=0.0,\n", + " adam_epsilon=1e-8,\n", + " warmup_steps=0,\n", + " train_batch_size=8,\n", + " eval_batch_size=8,\n", + " num_train_epochs=2,\n", + " gradient_accumulation_steps=16,\n", + " n_gpu=1,\n", + " early_stop_callback=False,\n", + " fp_16=False, # if you want to enable 16-bit training then install apex and set this to true\n", + " opt_level='O1', # you can find out more on optimisation levels here https://nvidia.github.io/apex/amp.html#opt-levels-and-properties\n", + " max_grad_norm=1.0, # if you enable 16-bit training then set this to a sensible value, 0.5 is a good default\n", + " seed=42,\n", + ")" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vfhlYUUV2NIh" + }, + "source": [ + "## IMDB review classification" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "b3C13iabZvwK" + }, + "source": [ + "### Download IMDB Data" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "7R0QdcgXuIWW" + }, + "source": [ + "!wget https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz\n", + "!tar -xvf aclImdb_v1.tar.gz" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "ni1cAK7EvXSB" + }, + "source": [ + "train_pos_files = glob.glob('aclImdb/train/pos/*.txt')\n", + "train_neg_files = glob.glob('aclImdb/train/neg/*.txt')" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "jEsRn5pa0v8d", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "6977ce56-d0b4-4d9f-8548-22003bb07eaf" + }, + "source": [ + "len(train_pos_files), len(train_neg_files)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(12500, 12500)" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 10 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5zgS8KhlaPiA" + }, + "source": [ + "We will use 2000 samples from the train set for validation. Let's choose 1000 postive reviews and 1000 negative reviews for validation and save them in the val directory" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "hLvBHcXwzXrk" + }, + "source": [ + "!mkdir aclImdb/val aclImdb/val/pos aclImdb/val/neg" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "IXZmLZ1pzjiY" + }, + "source": [ + "random.shuffle(train_pos_files)\n", + "random.shuffle(train_neg_files)\n", + "\n", + "val_pos_files = train_pos_files[:1000]\n", + "val_neg_files = train_neg_files[:1000]" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "5yTS2Jx40UNu" + }, + "source": [ + "import shutil" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "hJnJpkdb0ZKY" + }, + "source": [ + "for f in val_pos_files:\n", + " shutil.move(f, 'aclImdb/val/pos')\n", + "for f in val_neg_files:\n", + " shutil.move(f, 'aclImdb/val/neg')" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qdEgCwL7cIyi" + }, + "source": [ + "### Prepare Dataset" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "McQC1FotigqA", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 186, + "referenced_widgets": [ + "7d8f60bfc0a248e58028b6e8a477a5f7", + "72dc1e39b931429883e68c0603797896", + "cde60c5e18f04ba792fff8c2ac33f470", + "c0c0df12695b4a1eacf8fa4ccc0ac62c", + "72ea881ce3f445a9983d858b76dd257b", + "d0f0c28a14b242f8990a547ed7f87c04", + "f97741534b554be3b5cdccd45c73b317", + "1e70a3dc7090487fa883e932bff395cb" + ] + }, + "outputId": "f60dbf68-32cf-44e1-9a2f-f9dba38cbbac" + }, + "source": [ + "tokenizer = T5Tokenizer.from_pretrained('t5-base')" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "INFO:filelock:Lock 139780871368544 acquired on /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f.lock\n", + "INFO:transformers.file_utils:https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model not found in cache or force_download set to True, downloading to /root/.cache/torch/transformers/tmpgy9lk1eo\n" + ], + "name": "stderr" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "7d8f60bfc0a248e58028b6e8a477a5f7", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=791656.0, style=ProgressStyle(descripti…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "INFO:transformers.file_utils:storing https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model in cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\n", + "INFO:transformers.file_utils:creating metadata file for /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\n", + "INFO:filelock:Lock 139780871368544 released on /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f.lock\n", + "INFO:transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model from cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\n" + ], + "name": "stderr" + }, + { + "output_type": "stream", + "text": [ + "\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "wthd9SM74RG8", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "52deb6bd-19c4-4071-8bcb-254925d8e4cc" + }, + "source": [ + "ids_neg = tokenizer.encode('negative ')\n", + "ids_pos = tokenizer.encode('positive ')\n", + "len(ids_neg), len(ids_pos)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(2, 2)" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 21 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "k5sJkyI3a723" + }, + "source": [ + "All the examples are converted in the text-2-text format as shown in the paper. However I didn't use any task prefix here. The examples are encoded as follows,\n", + "if the review is positive then the target is 'positive' else 'negative'\n", + "\n", + "**input**: I went to see this\n", + "movie with my husband, and we both\n", + "thought the acting was terrible!\"\n", + "\n", + "**target**: negative\n", + "\n", + "**input**: Despite what others say,\n", + "I thought this movie was funny.\n", + "\n", + "**target**: positive" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VEYmYHKGcxEq" + }, + "source": [ + "The dataset below takes care of reading the review files and processing the examples in text-2-text format.\n", + "\n", + "It cleans the review text by removing the html tags. It also appends the eos token `` at the end of input and target as required by the T5 model\n", + "\n", + "For T5 max input length is 512 and we can choose the max length for target sequence depending upon our dataset. The `T5Tokenizer` encodes both 'postive' and 'negative' as a single ids so I chose the max target length 2, extra 1 for the `` token" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "IIY0GenSb72m" + }, + "source": [ + "class ImdbDataset(Dataset):\n", + " def __init__(self, tokenizer, data_dir, type_path, max_len=512):\n", + " self.pos_file_path = os.path.join(data_dir, type_path, 'pos')\n", + " self.neg_file_path = os.path.join(data_dir, type_path, 'neg')\n", + "\n", + " self.pos_files = glob.glob(\"%s/*.txt\" % self.pos_file_path)\n", + " self.neg_files = glob.glob(\"%s/*.txt\" % self.neg_file_path)\n", + "\n", + " self.max_len = max_len\n", + " self.tokenizer = tokenizer\n", + " self.inputs = []\n", + " self.targets = []\n", + "\n", + " self._build()\n", + "\n", + " def __len__(self):\n", + " return len(self.inputs)\n", + "\n", + " def __getitem__(self, index):\n", + " source_ids = self.inputs[index][\"input_ids\"].squeeze()\n", + " target_ids = self.targets[index][\"input_ids\"].squeeze()\n", + "\n", + " src_mask = self.inputs[index][\"attention_mask\"].squeeze() # might need to squeeze\n", + " target_mask = self.targets[index][\"attention_mask\"].squeeze() # might need to squeeze\n", + "\n", + " return {\"source_ids\": source_ids, \"source_mask\": src_mask, \"target_ids\": target_ids, \"target_mask\": target_mask}\n", + "\n", + " def _build(self):\n", + " self._buil_examples_from_files(self.pos_files, 'positive')\n", + " self._buil_examples_from_files(self.neg_files, 'negative')\n", + "\n", + " def _buil_examples_from_files(self, files, sentiment):\n", + " REPLACE_NO_SPACE = re.compile(\"[.;:!\\'?,\\\"()\\[\\]]\")\n", + " REPLACE_WITH_SPACE = re.compile(\"()|(\\-)|(\\/)\")\n", + "\n", + " for path in files:\n", + " with open(path, 'r') as f:\n", + " text = f.read()\n", + "\n", + " line = text.strip()\n", + " line = REPLACE_NO_SPACE.sub(\"\", line)\n", + " line = REPLACE_WITH_SPACE.sub(\"\", line)\n", + " line = line + ' '\n", + "\n", + " target = sentiment + \" \"\n", + "\n", + " # tokenize inputs\n", + " tokenized_inputs = self.tokenizer.batch_encode_plus(\n", + " [line], max_length=self.max_len, pad_to_max_length=True, return_tensors=\"pt\"\n", + " )\n", + " # tokenize targets\n", + " tokenized_targets = self.tokenizer.batch_encode_plus(\n", + " [target], max_length=2, pad_to_max_length=True, return_tensors=\"pt\"\n", + " )\n", + "\n", + " self.inputs.append(tokenized_inputs)\n", + " self.targets.append(tokenized_targets)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "gsnsKY6jemsr", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "98885b84-7f65-4d79-b470-619def772505" + }, + "source": [ + "dataset = ImdbDataset(tokenizer, 'aclImdb', 'val', max_len=512)\n", + "len(dataset)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "2000" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 23 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "7g1gz05ccAzg", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "outputId": "b3a263f1-8b22-46bf-9a33-f58c996d684a" + }, + "source": [ + "data = dataset[28]\n", + "print(tokenizer.decode(data['source_ids']))\n", + "print(tokenizer.decode(data['target_ids']))" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "To quote Flik that was my reaction exactly Wowyoure perfect This is the best movie I think I can even say its become my favorite movie ever even Wow I tell you what wow\n", + "positive\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "W4cfw8bMcNdA" + }, + "source": [ + "### Train" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "aTvkv4rzhPjy" + }, + "source": [ + "!mkdir -p t5_imdb_sentiment" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "r5ngAP4OXFqZ" + }, + "source": [ + "args_dict.update({'data_dir': 'aclImdb', 'output_dir': 't5_imdb_sentiment', 'num_train_epochs':2})\n", + "args = argparse.Namespace(**args_dict)\n", + "\n", + "checkpoint_callback = pl.callbacks.ModelCheckpoint(\n", + " filepath=args.output_dir, prefix=\"checkpoint\", monitor=\"val_loss\", mode=\"min\", save_top_k=5\n", + ")\n", + "\n", + "train_params = dict(\n", + " accumulate_grad_batches=args.gradient_accumulation_steps,\n", + " gpus=args.n_gpu,\n", + " max_epochs=args.num_train_epochs,\n", + " early_stop_callback=False,\n", + " precision= 16 if args.fp_16 else 32,\n", + " amp_level=args.opt_level,\n", + " gradient_clip_val=args.max_grad_norm,\n", + " checkpoint_callback=checkpoint_callback,\n", + " callbacks=[LoggingCallback()],\n", + ")" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RJt_VqzEAMUg" + }, + "source": [ + "Define the `get_dataset` function to return the dataset. The model calls this function to get the train and val datasets. We are defining a dataset function so that we won't need to modify the model code at all. Redefine the function to return different dataset according to the problem. While this is not the best solution for now this works" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "2h2aGPgp0vOf" + }, + "source": [ + "def get_dataset(tokenizer, type_path, args):\n", + " return ImdbDataset(tokenizer=tokenizer, data_dir=args.data_dir, type_path=type_path, max_len=args.max_seq_length)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4IOQpawZA9XC" + }, + "source": [ + "**Initialize model**" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "kJsz3a4SilAF", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000, + "referenced_widgets": [ + "f414bac332054c7f86af89b8e50c7d73", + "1d9c52a1bb8843b6b0f151571cbf30a4", + "ed039b8125714030b03912fb29a93ca4", + "d9b445b8b3b04569adf22429259b4954", + "6c61b3c76d7045eb825172ba51b3fa63", + "d11ffd1efc024c1ca86276430d29fd1e", + "22fac35d924f464ca0b33be21a566a86", + "cfe128b0d2c648c18d2255b3f8506a09", + "c34ac6d2548249819c1eab28956edec4", + "de2c77b3fb0f4dba99f92062b2db5328", + "6ea23f0979824aac935f3f1ad10a86cd", + "6452bc3b5ad445a8a5e272207fe4504d", + "d6ef508766c54f8993d1d1f3d7cac040", + "1b69bbddeb244defab9e21690a45c79e", + "4a2b56fd6780470ab1574509fa432183", + "3853231cd966465882a93fad9c5dc428" + ] + }, + "outputId": "d711c5a7-4c7d-4392-8cf5-3df1cbcf2859" + }, + "source": [ + "model = T5FineTuner(args)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "INFO:filelock:Lock 139780702227256 acquired on /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b.lock\n", + "INFO:transformers.file_utils:https://s3.amazonaws.com/models.huggingface.co/bert/t5-base-config.json not found in cache or force_download set to True, downloading to /root/.cache/torch/transformers/tmp5_6vo8c2\n" + ], + "name": "stderr" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "f414bac332054c7f86af89b8e50c7d73", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1199.0, style=ProgressStyle(description…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "INFO:transformers.file_utils:storing https://s3.amazonaws.com/models.huggingface.co/bert/t5-base-config.json in cache at /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b\n", + "INFO:transformers.file_utils:creating metadata file for /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b\n", + "INFO:filelock:Lock 139780702227256 released on /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b.lock\n", + "INFO:transformers.configuration_utils:loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/t5-base-config.json from cache at /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b\n", + "INFO:transformers.configuration_utils:Model config T5Config {\n", + " \"architectures\": [\n", + " \"T5WithLMHeadModel\"\n", + " ],\n", + " \"d_ff\": 3072,\n", + " \"d_kv\": 64,\n", + " \"d_model\": 768,\n", + " \"decoder_start_token_id\": 0,\n", + " \"dropout_rate\": 0.1,\n", + " \"eos_token_id\": 1,\n", + " \"initializer_factor\": 1.0,\n", + " \"is_encoder_decoder\": true,\n", + " \"layer_norm_epsilon\": 1e-06,\n", + " \"model_type\": \"t5\",\n", + " \"n_positions\": 512,\n", + " \"num_heads\": 12,\n", + " \"num_layers\": 12,\n", + " \"output_past\": true,\n", + " \"pad_token_id\": 0,\n", + " \"relative_attention_num_buckets\": 32,\n", + " \"task_specific_params\": {\n", + " \"summarization\": {\n", + " \"early_stopping\": true,\n", + " \"length_penalty\": 2.0,\n", + " \"max_length\": 200,\n", + " \"min_length\": 30,\n", + " \"no_repeat_ngram_size\": 3,\n", + " \"num_beams\": 4,\n", + " \"prefix\": \"summarize: \"\n", + " },\n", + " \"translation_en_to_de\": {\n", + " \"early_stopping\": true,\n", + " \"max_length\": 300,\n", + " \"num_beams\": 4,\n", + " \"prefix\": \"translate English to German: \"\n", + " },\n", + " \"translation_en_to_fr\": {\n", + " \"early_stopping\": true,\n", + " \"max_length\": 300,\n", + " \"num_beams\": 4,\n", + " \"prefix\": \"translate English to French: \"\n", + " },\n", + " \"translation_en_to_ro\": {\n", + " \"early_stopping\": true,\n", + " \"max_length\": 300,\n", + " \"num_beams\": 4,\n", + " \"prefix\": \"translate English to Romanian: \"\n", + " }\n", + " },\n", + " \"vocab_size\": 32128\n", + "}\n", + "\n" + ], + "name": "stderr" + }, + { + "output_type": "stream", + "text": [ + "\n" + ], + "name": "stdout" + }, + { + "output_type": "stream", + "text": [ + "INFO:filelock:Lock 139780702189776 acquired on /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa.lock\n", + "INFO:transformers.file_utils:https://cdn.huggingface.co/t5-base-pytorch_model.bin not found in cache or force_download set to True, downloading to /root/.cache/torch/transformers/tmps92w5ati\n" + ], + "name": "stderr" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c34ac6d2548249819c1eab28956edec4", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=891691430.0, style=ProgressStyle(descri…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "INFO:transformers.file_utils:storing https://cdn.huggingface.co/t5-base-pytorch_model.bin in cache at /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa\n", + "INFO:transformers.file_utils:creating metadata file for /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa\n", + "INFO:filelock:Lock 139780702189776 released on /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa.lock\n", + "INFO:transformers.modeling_utils:loading weights file https://cdn.huggingface.co/t5-base-pytorch_model.bin from cache at /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa\n" + ], + "name": "stderr" + }, + { + "output_type": "stream", + "text": [ + "\n" + ], + "name": "stdout" + }, + { + "output_type": "stream", + "text": [ + "INFO:transformers.modeling_utils:Weights of T5ForConditionalGeneration not initialized from pretrained model: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight']\n", + "INFO:transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model from cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\n" + ], + "name": "stderr" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RSJytKv1BFyc" + }, + "source": [ + "**Initialize trainer**" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "PxO8OTA3irbw", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 50 + }, + "outputId": "6ebd7f3f-09fe-4363-9869-24d39183d2ff" + }, + "source": [ + "trainer = pl.Trainer(**train_params)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "INFO:lightning:GPU available: True, used: True\n", + "INFO:lightning:CUDA_VISIBLE_DEVICES: [0]\n" + ], + "name": "stderr" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Wo7cSSvFGEhe" + }, + "source": [ + "**start fine-tuning**" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "hVGd6imfizLP", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000, + "referenced_widgets": [ + "915a0b65612243668570c555a47a6c37", + "c85b348624504af294b78de744969493", + "d56a6918840e4f6588af5da5f8f54015", + "41db48cf488a4522b1f04b33c2261262", + "8c2d9ac8c22f486299949f4cbed16437", + "222974dba69145e7b171360bec239ba5", + "9e95200811bb497ab0ac0229f5e0ddaa", + "3773b14f23974ad3a5bbb7ff947e68ca", + "3ec26f803d124dd0877e1ce0e3517f68", + "aabb0b2f2ae64684a80f1ea39c9a7d1b", + "885696e0606c4353a5d21feec03aebc7", + "659dd7302f3a40038834c4f1d8e59250", + "6f3859c80aa945e4b4ae2aa957755b7c", + "a840a738d20b4f43baf18453db53fdf0", + "f7139c4e04374ffbafe6a849500c6369", + "ef8f0b7c9b0c4f829e3ad59e83cbdd67", + "dbe7a4854b8f420faaea8de4583fb1f0", + "4d1f674483d44e559ae1de553dd1d726", + "ce506c0137914e4db93b9db35154c62a", + "e92a181ff64d4e0290236a91cbdb8d67", + "e8f7179c238e4d2d91d456b2c07e1b3e", + "e67100d71b5047158ab48ef0fd36cb99", + "17f7e321de81404dabaa3e84fadce2cf", + "a15e2fcc467242cb9fad5b2082a70c39", + "f40c9bf16c9a473ba758a6439dce2652", + "8d17a251bf1440d4aa8513ad5f15ba1d", + "165319529b364183ae344a9a14f5bc52", + "3d0c08f3abbe421d83f2b35583221291", + "6e851577f682494c894b9afdd07b1201", + "e67e9e945a9c430f9844946cd81aae3a", + "34fbc6e29df046faaedd9fe3230559cb", + "bbbdd81a2e8f4d68b33d698f45ccc9ae" + ] + }, + "outputId": "cca18a5f-7900-4f58-ed74-6684b72a54e1" + }, + "source": [ + "trainer.fit(model)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "INFO:lightning:\n", + " | Name | Type | Params\n", + "-----------------------------------------------------------------------------------------------------------------\n", + "0 | model | T5ForConditionalGeneration | 222 M \n", + "1 | model.shared | Embedding | 24 M \n", + "2 | model.encoder | T5Stack | 109 M \n", + "3 | model.encoder.block | ModuleList | 84 M \n", + "4 | model.encoder.block.0 | T5Block | 7 M \n", + "5 | model.encoder.block.0.layer | ModuleList | 7 M \n", + "6 | model.encoder.block.0.layer.0 | T5LayerSelfAttention | 2 M \n", + "7 | model.encoder.block.0.layer.0.SelfAttention | T5Attention | 2 M \n", + "8 | model.encoder.block.0.layer.0.SelfAttention.q | Linear | 589 K \n", + "9 | model.encoder.block.0.layer.0.SelfAttention.k | Linear | 589 K \n", + "10 | model.encoder.block.0.layer.0.SelfAttention.v | Linear | 589 K \n", + "11 | model.encoder.block.0.layer.0.SelfAttention.o | Linear | 589 K \n", + "12 | model.encoder.block.0.layer.0.SelfAttention.relative_attention_bias | Embedding | 384 \n", + "13 | model.encoder.block.0.layer.0.layer_norm | T5LayerNorm | 768 \n", + "14 | model.encoder.block.0.layer.0.dropout | Dropout | 0 \n", + "15 | model.encoder.block.0.layer.1 | T5LayerFF | 4 M \n", + "16 | model.encoder.block.0.layer.1.DenseReluDense | T5DenseReluDense | 4 M \n", + "17 | model.encoder.block.0.layer.1.DenseReluDense.wi | Linear | 2 M \n", + "18 | model.encoder.block.0.layer.1.DenseReluDense.wo | Linear | 2 M \n", + "19 | model.encoder.block.0.layer.1.DenseReluDense.dropout | Dropout | 0 \n", + "20 | model.encoder.block.0.layer.1.layer_norm | T5LayerNorm | 768 \n", + "21 | model.encoder.block.0.layer.1.dropout | Dropout | 0 \n", + "22 | model.encoder.block.1 | T5Block | 7 M \n", + "23 | model.encoder.block.1.layer | ModuleList | 7 M \n", + "24 | model.encoder.block.1.layer.0 | T5LayerSelfAttention | 2 M \n", + "25 | model.encoder.block.1.layer.0.SelfAttention | T5Attention | 2 M \n", + "26 | model.encoder.block.1.layer.0.SelfAttention.q | Linear | 589 K \n", + "27 | model.encoder.block.1.layer.0.SelfAttention.k | Linear | 589 K \n", + "28 | model.encoder.block.1.layer.0.SelfAttention.v | Linear | 589 K \n", + "29 | model.encoder.block.1.layer.0.SelfAttention.o | Linear | 589 K \n", + "30 | model.encoder.block.1.layer.0.layer_norm | T5LayerNorm | 768 \n", + "31 | model.encoder.block.1.layer.0.dropout | Dropout | 0 \n", + "32 | model.encoder.block.1.layer.1 | T5LayerFF | 4 M \n", + "33 | model.encoder.block.1.layer.1.DenseReluDense | T5DenseReluDense | 4 M \n", + "34 | model.encoder.block.1.layer.1.DenseReluDense.wi | Linear | 2 M \n", + "35 | model.encoder.block.1.layer.1.DenseReluDense.wo | Linear | 2 M \n", + "36 | model.encoder.block.1.layer.1.DenseReluDense.dropout | Dropout | 0 \n", + "37 | model.encoder.block.1.layer.1.layer_norm | T5LayerNorm | 768 \n", + "38 | model.encoder.block.1.layer.1.dropout | Dropout | 0 \n", + "39 | model.encoder.block.2 | T5Block | 7 M \n", + "40 | model.encoder.block.2.layer | ModuleList | 7 M \n", + "41 | model.encoder.block.2.layer.0 | T5LayerSelfAttention | 2 M \n", + "42 | model.encoder.block.2.layer.0.SelfAttention | T5Attention | 2 M \n", + "43 | model.encoder.block.2.layer.0.SelfAttention.q | Linear | 589 K \n", + "44 | model.encoder.block.2.layer.0.SelfAttention.k | Linear | 589 K \n", + "45 | model.encoder.block.2.layer.0.SelfAttention.v | Linear | 589 K \n", + "46 | model.encoder.block.2.layer.0.SelfAttention.o | Linear | 589 K \n", + "47 | model.encoder.block.2.layer.0.layer_norm | T5LayerNorm | 768 \n", + "48 | model.encoder.block.2.layer.0.dropout | Dropout | 0 \n", + "49 | model.encoder.block.2.layer.1 | T5LayerFF | 4 M \n", + "50 | model.encoder.block.2.layer.1.DenseReluDense | T5DenseReluDense | 4 M \n", + "51 | model.encoder.block.2.layer.1.DenseReluDense.wi | Linear | 2 M \n", + "52 | model.encoder.block.2.layer.1.DenseReluDense.wo | Linear | 2 M \n", + "53 | model.encoder.block.2.layer.1.DenseReluDense.dropout | Dropout | 0 \n", + "54 | model.encoder.block.2.layer.1.layer_norm | T5LayerNorm | 768 \n", + "55 | model.encoder.block.2.layer.1.dropout | Dropout | 0 \n", + "56 | model.encoder.block.3 | T5Block | 7 M \n", + "57 | model.encoder.block.3.layer | ModuleList | 7 M \n", + "58 | model.encoder.block.3.layer.0 | T5LayerSelfAttention | 2 M \n", + "59 | model.encoder.block.3.layer.0.SelfAttention | T5Attention | 2 M \n", + "60 | model.encoder.block.3.layer.0.SelfAttention.q | Linear | 589 K \n", + "61 | model.encoder.block.3.layer.0.SelfAttention.k | Linear | 589 K \n", + "62 | model.encoder.block.3.layer.0.SelfAttention.v | Linear | 589 K \n", + "63 | model.encoder.block.3.layer.0.SelfAttention.o | Linear | 589 K \n", + "64 | model.encoder.block.3.layer.0.layer_norm | T5LayerNorm | 768 \n", + "65 | model.encoder.block.3.layer.0.dropout | Dropout | 0 \n", + "66 | model.encoder.block.3.layer.1 | T5LayerFF | 4 M \n", + "67 | model.encoder.block.3.layer.1.DenseReluDense | T5DenseReluDense | 4 M \n", + "68 | model.encoder.block.3.layer.1.DenseReluDense.wi | Linear | 2 M \n", + "69 | model.encoder.block.3.layer.1.DenseReluDense.wo | Linear | 2 M \n", + "70 | model.encoder.block.3.layer.1.DenseReluDense.dropout | Dropout | 0 \n", + "71 | model.encoder.block.3.layer.1.layer_norm | T5LayerNorm | 768 \n", + "72 | model.encoder.block.3.layer.1.dropout | Dropout | 0 \n", + "73 | model.encoder.block.4 | T5Block | 7 M \n", + "74 | model.encoder.block.4.layer | ModuleList | 7 M \n", + "75 | model.encoder.block.4.layer.0 | T5LayerSelfAttention | 2 M \n", + "76 | model.encoder.block.4.layer.0.SelfAttention | T5Attention | 2 M \n", + "77 | model.encoder.block.4.layer.0.SelfAttention.q | Linear | 589 K \n", + "78 | model.encoder.block.4.layer.0.SelfAttention.k | Linear | 589 K \n", + "79 | model.encoder.block.4.layer.0.SelfAttention.v | Linear | 589 K \n", + "80 | model.encoder.block.4.layer.0.SelfAttention.o | Linear | 589 K \n", + "81 | model.encoder.block.4.layer.0.layer_norm | T5LayerNorm | 768 \n", + "82 | model.encoder.block.4.layer.0.dropout | Dropout | 0 \n", + "83 | model.encoder.block.4.layer.1 | T5LayerFF | 4 M \n", + "84 | model.encoder.block.4.layer.1.DenseReluDense | T5DenseReluDense | 4 M \n", + "85 | model.encoder.block.4.layer.1.DenseReluDense.wi | Linear | 2 M \n", + "86 | model.encoder.block.4.layer.1.DenseReluDense.wo | Linear | 2 M \n", + "87 | model.encoder.block.4.layer.1.DenseReluDense.dropout | Dropout | 0 \n", + "88 | model.encoder.block.4.layer.1.layer_norm | T5LayerNorm | 768 \n", + "89 | model.encoder.block.4.layer.1.dropout | Dropout | 0 \n", + "90 | model.encoder.block.5 | T5Block | 7 M \n", + "91 | model.encoder.block.5.layer | ModuleList | 7 M \n", + "92 | model.encoder.block.5.layer.0 | T5LayerSelfAttention | 2 M \n", + "93 | model.encoder.block.5.layer.0.SelfAttention | T5Attention | 2 M \n", + "94 | model.encoder.block.5.layer.0.SelfAttention.q | Linear | 589 K \n", + "95 | model.encoder.block.5.layer.0.SelfAttention.k | Linear | 589 K \n", + "96 | model.encoder.block.5.layer.0.SelfAttention.v | Linear | 589 K \n", + "97 | model.encoder.block.5.layer.0.SelfAttention.o | Linear | 589 K \n", + "98 | model.encoder.block.5.layer.0.layer_norm | T5LayerNorm | 768 \n", + "99 | model.encoder.block.5.layer.0.dropout | Dropout | 0 \n", + "100 | model.encoder.block.5.layer.1 | T5LayerFF | 4 M \n", + "101 | model.encoder.block.5.layer.1.DenseReluDense | T5DenseReluDense | 4 M \n", + "102 | model.encoder.block.5.layer.1.DenseReluDense.wi | Linear | 2 M \n", + "103 | model.encoder.block.5.layer.1.DenseReluDense.wo | Linear | 2 M \n", + "104 | model.encoder.block.5.layer.1.DenseReluDense.dropout | Dropout | 0 \n", + "105 | model.encoder.block.5.layer.1.layer_norm | T5LayerNorm | 768 \n", + "106 | model.encoder.block.5.layer.1.dropout | Dropout | 0 \n", + "107 | model.encoder.block.6 | T5Block | 7 M \n", + "108 | model.encoder.block.6.layer | ModuleList | 7 M \n", + "109 | model.encoder.block.6.layer.0 | T5LayerSelfAttention | 2 M \n", + "110 | model.encoder.block.6.layer.0.SelfAttention | T5Attention | 2 M \n", + "111 | model.encoder.block.6.layer.0.SelfAttention.q | Linear | 589 K \n", + "112 | model.encoder.block.6.layer.0.SelfAttention.k | Linear | 589 K \n", + "113 | model.encoder.block.6.layer.0.SelfAttention.v | Linear | 589 K \n", + "114 | model.encoder.block.6.layer.0.SelfAttention.o | Linear | 589 K \n", + "115 | model.encoder.block.6.layer.0.layer_norm | T5LayerNorm | 768 \n", + "116 | model.encoder.block.6.layer.0.dropout | Dropout | 0 \n", + "117 | model.encoder.block.6.layer.1 | T5LayerFF | 4 M \n", + "118 | model.encoder.block.6.layer.1.DenseReluDense | T5DenseReluDense | 4 M \n", + "119 | model.encoder.block.6.layer.1.DenseReluDense.wi | Linear | 2 M \n", + "120 | model.encoder.block.6.layer.1.DenseReluDense.wo | Linear | 2 M \n", + "121 | model.encoder.block.6.layer.1.DenseReluDense.dropout | Dropout | 0 \n", + "122 | model.encoder.block.6.layer.1.layer_norm | T5LayerNorm | 768 \n", + "123 | model.encoder.block.6.layer.1.dropout | Dropout | 0 \n", + "124 | model.encoder.block.7 | T5Block | 7 M \n", + "125 | model.encoder.block.7.layer | ModuleList | 7 M \n", + "126 | model.encoder.block.7.layer.0 | T5LayerSelfAttention | 2 M \n", + "127 | model.encoder.block.7.layer.0.SelfAttention | T5Attention | 2 M \n", + "128 | model.encoder.block.7.layer.0.SelfAttention.q | Linear | 589 K \n", + "129 | model.encoder.block.7.layer.0.SelfAttention.k | Linear | 589 K \n", + "130 | model.encoder.block.7.layer.0.SelfAttention.v | Linear | 589 K \n", + "131 | model.encoder.block.7.layer.0.SelfAttention.o | Linear | 589 K \n", + "132 | model.encoder.block.7.layer.0.layer_norm | T5LayerNorm | 768 \n", + "133 | model.encoder.block.7.layer.0.dropout | Dropout | 0 \n", + "134 | model.encoder.block.7.layer.1 | T5LayerFF | 4 M \n", + "135 | model.encoder.block.7.layer.1.DenseReluDense | T5DenseReluDense | 4 M \n", + "136 | model.encoder.block.7.layer.1.DenseReluDense.wi | Linear | 2 M \n", + "137 | model.encoder.block.7.layer.1.DenseReluDense.wo | Linear | 2 M \n", + "138 | model.encoder.block.7.layer.1.DenseReluDense.dropout | Dropout | 0 \n", + "139 | model.encoder.block.7.layer.1.layer_norm | T5LayerNorm | 768 \n", + "140 | model.encoder.block.7.layer.1.dropout | Dropout | 0 \n", + "141 | model.encoder.block.8 | T5Block | 7 M \n", + "142 | model.encoder.block.8.layer | ModuleList | 7 M \n", + "143 | model.encoder.block.8.layer.0 | T5LayerSelfAttention | 2 M \n", + "144 | model.encoder.block.8.layer.0.SelfAttention | T5Attention | 2 M \n", + "145 | model.encoder.block.8.layer.0.SelfAttention.q | Linear | 589 K \n", + "146 | model.encoder.block.8.layer.0.SelfAttention.k | Linear | 589 K \n", + "147 | model.encoder.block.8.layer.0.SelfAttention.v | Linear | 589 K \n", + "148 | model.encoder.block.8.layer.0.SelfAttention.o | Linear | 589 K \n", + "149 | model.encoder.block.8.layer.0.layer_norm | T5LayerNorm | 768 \n", + "150 | model.encoder.block.8.layer.0.dropout | Dropout | 0 \n", + "151 | model.encoder.block.8.layer.1 | T5LayerFF | 4 M \n", + "152 | model.encoder.block.8.layer.1.DenseReluDense | T5DenseReluDense | 4 M \n", + "153 | model.encoder.block.8.layer.1.DenseReluDense.wi | Linear | 2 M \n", + "154 | model.encoder.block.8.layer.1.DenseReluDense.wo | Linear | 2 M \n", + "155 | model.encoder.block.8.layer.1.DenseReluDense.dropout | Dropout | 0 \n", + "156 | model.encoder.block.8.layer.1.layer_norm | T5LayerNorm | 768 \n", + "157 | model.encoder.block.8.layer.1.dropout | Dropout | 0 \n", + "158 | model.encoder.block.9 | T5Block | 7 M \n", + "159 | model.encoder.block.9.layer | ModuleList | 7 M \n", + "160 | model.encoder.block.9.layer.0 | T5LayerSelfAttention | 2 M \n", + "161 | model.encoder.block.9.layer.0.SelfAttention | T5Attention | 2 M \n", + "162 | model.encoder.block.9.layer.0.SelfAttention.q | Linear | 589 K \n", + "163 | model.encoder.block.9.layer.0.SelfAttention.k | Linear | 589 K \n", + "164 | model.encoder.block.9.layer.0.SelfAttention.v | Linear | 589 K \n", + "165 | model.encoder.block.9.layer.0.SelfAttention.o | Linear | 589 K \n", + "166 | model.encoder.block.9.layer.0.layer_norm | T5LayerNorm | 768 \n", + "167 | model.encoder.block.9.layer.0.dropout | Dropout | 0 \n", + "168 | model.encoder.block.9.layer.1 | T5LayerFF | 4 M \n", + "169 | model.encoder.block.9.layer.1.DenseReluDense | T5DenseReluDense | 4 M \n", + "170 | model.encoder.block.9.layer.1.DenseReluDense.wi | Linear | 2 M \n", + "171 | model.encoder.block.9.layer.1.DenseReluDense.wo | Linear | 2 M \n", + "172 | model.encoder.block.9.layer.1.DenseReluDense.dropout | Dropout | 0 \n", + "173 | model.encoder.block.9.layer.1.layer_norm | T5LayerNorm | 768 \n", + "174 | model.encoder.block.9.layer.1.dropout | Dropout | 0 \n", + "175 | model.encoder.block.10 | T5Block | 7 M \n", + "176 | model.encoder.block.10.layer | ModuleList | 7 M \n", + "177 | model.encoder.block.10.layer.0 | T5LayerSelfAttention | 2 M \n", + "178 | model.encoder.block.10.layer.0.SelfAttention | T5Attention | 2 M \n", + "179 | model.encoder.block.10.layer.0.SelfAttention.q | Linear | 589 K \n", + "180 | model.encoder.block.10.layer.0.SelfAttention.k | Linear | 589 K \n", + "181 | model.encoder.block.10.layer.0.SelfAttention.v | Linear | 589 K \n", + "182 | model.encoder.block.10.layer.0.SelfAttention.o | Linear | 589 K \n", + "183 | model.encoder.block.10.layer.0.layer_norm | T5LayerNorm | 768 \n", + "184 | model.encoder.block.10.layer.0.dropout | Dropout | 0 \n", + "185 | model.encoder.block.10.layer.1 | T5LayerFF | 4 M \n", + "186 | model.encoder.block.10.layer.1.DenseReluDense | T5DenseReluDense | 4 M \n", + "187 | model.encoder.block.10.layer.1.DenseReluDense.wi | Linear | 2 M \n", + "188 | model.encoder.block.10.layer.1.DenseReluDense.wo | Linear | 2 M \n", + "189 | model.encoder.block.10.layer.1.DenseReluDense.dropout | Dropout | 0 \n", + "190 | model.encoder.block.10.layer.1.layer_norm | T5LayerNorm | 768 \n", + "191 | model.encoder.block.10.layer.1.dropout | Dropout | 0 \n", + "192 | model.encoder.block.11 | T5Block | 7 M \n", + "193 | model.encoder.block.11.layer | ModuleList | 7 M \n", + "194 | model.encoder.block.11.layer.0 | T5LayerSelfAttention | 2 M \n", + "195 | model.encoder.block.11.layer.0.SelfAttention | T5Attention | 2 M \n", + "196 | model.encoder.block.11.layer.0.SelfAttention.q | Linear | 589 K \n", + "197 | model.encoder.block.11.layer.0.SelfAttention.k | Linear | 589 K \n", + "198 | model.encoder.block.11.layer.0.SelfAttention.v | Linear | 589 K \n", + "199 | model.encoder.block.11.layer.0.SelfAttention.o | Linear | 589 K \n", + "200 | model.encoder.block.11.layer.0.layer_norm | T5LayerNorm | 768 \n", + "201 | model.encoder.block.11.layer.0.dropout | Dropout | 0 \n", + "202 | model.encoder.block.11.layer.1 | T5LayerFF | 4 M \n", + "203 | model.encoder.block.11.layer.1.DenseReluDense | T5DenseReluDense | 4 M \n", + "204 | model.encoder.block.11.layer.1.DenseReluDense.wi | Linear | 2 M \n", + "205 | model.encoder.block.11.layer.1.DenseReluDense.wo | Linear | 2 M \n", + "206 | model.encoder.block.11.layer.1.DenseReluDense.dropout | Dropout | 0 \n", + "207 | model.encoder.block.11.layer.1.layer_norm | T5LayerNorm | 768 \n", + "208 | model.encoder.block.11.layer.1.dropout | Dropout | 0 \n", + "209 | model.encoder.final_layer_norm | T5LayerNorm | 768 \n", + "210 | model.encoder.dropout | Dropout | 0 \n", + "211 | model.decoder | T5Stack | 137 M \n", + "212 | model.decoder.block | ModuleList | 113 M \n", + "213 | model.decoder.block.0 | T5Block | 9 M \n", + "214 | model.decoder.block.0.layer | ModuleList | 9 M \n", + "215 | model.decoder.block.0.layer.0 | T5LayerSelfAttention | 2 M \n", + "216 | model.decoder.block.0.layer.0.SelfAttention | T5Attention | 2 M \n", + "217 | model.decoder.block.0.layer.0.SelfAttention.q | Linear | 589 K \n", + "218 | model.decoder.block.0.layer.0.SelfAttention.k | Linear | 589 K \n", + "219 | model.decoder.block.0.layer.0.SelfAttention.v | Linear | 589 K \n", + "220 | model.decoder.block.0.layer.0.SelfAttention.o | Linear | 589 K \n", + "221 | model.decoder.block.0.layer.0.SelfAttention.relative_attention_bias | Embedding | 384 \n", + "222 | model.decoder.block.0.layer.0.layer_norm | T5LayerNorm | 768 \n", + "223 | model.decoder.block.0.layer.0.dropout | Dropout | 0 \n", + "224 | model.decoder.block.0.layer.1 | T5LayerCrossAttention | 2 M \n", + "225 | model.decoder.block.0.layer.1.EncDecAttention | T5Attention | 2 M \n", + "226 | model.decoder.block.0.layer.1.EncDecAttention.q | Linear | 589 K \n", + "227 | model.decoder.block.0.layer.1.EncDecAttention.k | Linear | 589 K \n", + "228 | model.decoder.block.0.layer.1.EncDecAttention.v | Linear | 589 K \n", + "229 | model.decoder.block.0.layer.1.EncDecAttention.o | Linear | 589 K \n", + "230 | model.decoder.block.0.layer.1.EncDecAttention.relative_attention_bias | Embedding | 384 \n", + "231 | model.decoder.block.0.layer.1.layer_norm | T5LayerNorm | 768 \n", + "232 | model.decoder.block.0.layer.1.dropout | Dropout | 0 \n", + "233 | model.decoder.block.0.layer.2 | T5LayerFF | 4 M \n", + "234 | model.decoder.block.0.layer.2.DenseReluDense | T5DenseReluDense | 4 M \n", + "235 | model.decoder.block.0.layer.2.DenseReluDense.wi | Linear | 2 M \n", + "236 | model.decoder.block.0.layer.2.DenseReluDense.wo | Linear | 2 M \n", + "237 | model.decoder.block.0.layer.2.DenseReluDense.dropout | Dropout | 0 \n", + "238 | model.decoder.block.0.layer.2.layer_norm | T5LayerNorm | 768 \n", + "239 | model.decoder.block.0.layer.2.dropout | Dropout | 0 \n", + "240 | model.decoder.block.1 | T5Block | 9 M \n", + "241 | model.decoder.block.1.layer | ModuleList | 9 M \n", + "242 | model.decoder.block.1.layer.0 | T5LayerSelfAttention | 2 M \n", + "243 | model.decoder.block.1.layer.0.SelfAttention | T5Attention | 2 M \n", + "244 | model.decoder.block.1.layer.0.SelfAttention.q | Linear | 589 K \n", + "245 | model.decoder.block.1.layer.0.SelfAttention.k | Linear | 589 K \n", + "246 | model.decoder.block.1.layer.0.SelfAttention.v | Linear | 589 K \n", + "247 | model.decoder.block.1.layer.0.SelfAttention.o | Linear | 589 K \n", + "248 | model.decoder.block.1.layer.0.layer_norm | T5LayerNorm | 768 \n", + "249 | model.decoder.block.1.layer.0.dropout | Dropout | 0 \n", + "250 | model.decoder.block.1.layer.1 | T5LayerCrossAttention | 2 M \n", + "251 | model.decoder.block.1.layer.1.EncDecAttention | T5Attention | 2 M \n", + "252 | model.decoder.block.1.layer.1.EncDecAttention.q | Linear | 589 K \n", + "253 | model.decoder.block.1.layer.1.EncDecAttention.k | Linear | 589 K \n", + "254 | model.decoder.block.1.layer.1.EncDecAttention.v | Linear | 589 K \n", + "255 | model.decoder.block.1.layer.1.EncDecAttention.o | Linear | 589 K \n", + "256 | model.decoder.block.1.layer.1.layer_norm | T5LayerNorm | 768 \n", + "257 | model.decoder.block.1.layer.1.dropout | Dropout | 0 \n", + "258 | model.decoder.block.1.layer.2 | T5LayerFF | 4 M \n", + "259 | model.decoder.block.1.layer.2.DenseReluDense | T5DenseReluDense | 4 M \n", + "260 | model.decoder.block.1.layer.2.DenseReluDense.wi | Linear | 2 M \n", + "261 | model.decoder.block.1.layer.2.DenseReluDense.wo | Linear | 2 M \n", + "262 | model.decoder.block.1.layer.2.DenseReluDense.dropout | Dropout | 0 \n", + "263 | model.decoder.block.1.layer.2.layer_norm | T5LayerNorm | 768 \n", + "264 | model.decoder.block.1.layer.2.dropout | Dropout | 0 \n", + "265 | model.decoder.block.2 | T5Block | 9 M \n", + "266 | model.decoder.block.2.layer | ModuleList | 9 M \n", + "267 | model.decoder.block.2.layer.0 | T5LayerSelfAttention | 2 M \n", + "268 | model.decoder.block.2.layer.0.SelfAttention | T5Attention | 2 M \n", + "269 | model.decoder.block.2.layer.0.SelfAttention.q | Linear | 589 K \n", + "270 | model.decoder.block.2.layer.0.SelfAttention.k | Linear | 589 K \n", + "271 | model.decoder.block.2.layer.0.SelfAttention.v | Linear | 589 K \n", + "272 | model.decoder.block.2.layer.0.SelfAttention.o | Linear | 589 K \n", + "273 | model.decoder.block.2.layer.0.layer_norm | T5LayerNorm | 768 \n", + "274 | model.decoder.block.2.layer.0.dropout | Dropout | 0 \n", + "275 | model.decoder.block.2.layer.1 | T5LayerCrossAttention | 2 M \n", + "276 | model.decoder.block.2.layer.1.EncDecAttention | T5Attention | 2 M \n", + "277 | model.decoder.block.2.layer.1.EncDecAttention.q | Linear | 589 K \n", + "278 | model.decoder.block.2.layer.1.EncDecAttention.k | Linear | 589 K \n", + "279 | model.decoder.block.2.layer.1.EncDecAttention.v | Linear | 589 K \n", + "280 | model.decoder.block.2.layer.1.EncDecAttention.o | Linear | 589 K \n", + "281 | model.decoder.block.2.layer.1.layer_norm | T5LayerNorm | 768 \n", + "282 | model.decoder.block.2.layer.1.dropout | Dropout | 0 \n", + "283 | model.decoder.block.2.layer.2 | T5LayerFF | 4 M \n", + "284 | model.decoder.block.2.layer.2.DenseReluDense | T5DenseReluDense | 4 M \n", + "285 | model.decoder.block.2.layer.2.DenseReluDense.wi | Linear | 2 M \n", + "286 | model.decoder.block.2.layer.2.DenseReluDense.wo | Linear | 2 M \n", + "287 | model.decoder.block.2.layer.2.DenseReluDense.dropout | Dropout | 0 \n", + "288 | model.decoder.block.2.layer.2.layer_norm | T5LayerNorm | 768 \n", + "289 | model.decoder.block.2.layer.2.dropout | Dropout | 0 \n", + "290 | model.decoder.block.3 | T5Block | 9 M \n", + "291 | model.decoder.block.3.layer | ModuleList | 9 M \n", + "292 | model.decoder.block.3.layer.0 | T5LayerSelfAttention | 2 M \n", + "293 | model.decoder.block.3.layer.0.SelfAttention | T5Attention | 2 M \n", + "294 | model.decoder.block.3.layer.0.SelfAttention.q | Linear | 589 K \n", + "295 | model.decoder.block.3.layer.0.SelfAttention.k | Linear | 589 K \n", + "296 | model.decoder.block.3.layer.0.SelfAttention.v | Linear | 589 K \n", + "297 | model.decoder.block.3.layer.0.SelfAttention.o | Linear | 589 K \n", + "298 | model.decoder.block.3.layer.0.layer_norm | T5LayerNorm | 768 \n", + "299 | model.decoder.block.3.layer.0.dropout | Dropout | 0 \n", + "300 | model.decoder.block.3.layer.1 | T5LayerCrossAttention | 2 M \n", + "301 | model.decoder.block.3.layer.1.EncDecAttention | T5Attention | 2 M \n", + "302 | model.decoder.block.3.layer.1.EncDecAttention.q | Linear | 589 K \n", + "303 | model.decoder.block.3.layer.1.EncDecAttention.k | Linear | 589 K \n", + "304 | model.decoder.block.3.layer.1.EncDecAttention.v | Linear | 589 K \n", + "305 | model.decoder.block.3.layer.1.EncDecAttention.o | Linear | 589 K \n", + "306 | model.decoder.block.3.layer.1.layer_norm | T5LayerNorm | 768 \n", + "307 | model.decoder.block.3.layer.1.dropout | Dropout | 0 \n", + "308 | model.decoder.block.3.layer.2 | T5LayerFF | 4 M \n", + "309 | model.decoder.block.3.layer.2.DenseReluDense | T5DenseReluDense | 4 M \n", + "310 | model.decoder.block.3.layer.2.DenseReluDense.wi | Linear | 2 M \n", + "311 | model.decoder.block.3.layer.2.DenseReluDense.wo | Linear | 2 M \n", + "312 | model.decoder.block.3.layer.2.DenseReluDense.dropout | Dropout | 0 \n", + "313 | model.decoder.block.3.layer.2.layer_norm | T5LayerNorm | 768 \n", + "314 | model.decoder.block.3.layer.2.dropout | Dropout | 0 \n", + "315 | model.decoder.block.4 | T5Block | 9 M \n", + "316 | model.decoder.block.4.layer | ModuleList | 9 M \n", + "317 | model.decoder.block.4.layer.0 | T5LayerSelfAttention | 2 M \n", + "318 | model.decoder.block.4.layer.0.SelfAttention | T5Attention | 2 M \n", + "319 | model.decoder.block.4.layer.0.SelfAttention.q | Linear | 589 K \n", + "320 | model.decoder.block.4.layer.0.SelfAttention.k | Linear | 589 K \n", + "321 | model.decoder.block.4.layer.0.SelfAttention.v | Linear | 589 K \n", + "322 | model.decoder.block.4.layer.0.SelfAttention.o | Linear | 589 K \n", + "323 | model.decoder.block.4.layer.0.layer_norm | T5LayerNorm | 768 \n", + "324 | model.decoder.block.4.layer.0.dropout | Dropout | 0 \n", + "325 | model.decoder.block.4.layer.1 | T5LayerCrossAttention | 2 M \n", + "326 | model.decoder.block.4.layer.1.EncDecAttention | T5Attention | 2 M \n", + "327 | model.decoder.block.4.layer.1.EncDecAttention.q | Linear | 589 K \n", + "328 | model.decoder.block.4.layer.1.EncDecAttention.k | Linear | 589 K \n", + "329 | model.decoder.block.4.layer.1.EncDecAttention.v | Linear | 589 K \n", + "330 | model.decoder.block.4.layer.1.EncDecAttention.o | Linear | 589 K \n", + "331 | model.decoder.block.4.layer.1.layer_norm | T5LayerNorm | 768 \n", + "332 | model.decoder.block.4.layer.1.dropout | Dropout | 0 \n", + "333 | model.decoder.block.4.layer.2 | T5LayerFF | 4 M \n", + "334 | model.decoder.block.4.layer.2.DenseReluDense | T5DenseReluDense | 4 M \n", + "335 | model.decoder.block.4.layer.2.DenseReluDense.wi | Linear | 2 M \n", + "336 | model.decoder.block.4.layer.2.DenseReluDense.wo | Linear | 2 M \n", + "337 | model.decoder.block.4.layer.2.DenseReluDense.dropout | Dropout | 0 \n", + "338 | model.decoder.block.4.layer.2.layer_norm | T5LayerNorm | 768 \n", + "339 | model.decoder.block.4.layer.2.dropout | Dropout | 0 \n", + "340 | model.decoder.block.5 | T5Block | 9 M \n", + "341 | model.decoder.block.5.layer | ModuleList | 9 M \n", + "342 | model.decoder.block.5.layer.0 | T5LayerSelfAttention | 2 M \n", + "343 | model.decoder.block.5.layer.0.SelfAttention | T5Attention | 2 M \n", + "344 | model.decoder.block.5.layer.0.SelfAttention.q | Linear | 589 K \n", + "345 | model.decoder.block.5.layer.0.SelfAttention.k | Linear | 589 K \n", + "346 | model.decoder.block.5.layer.0.SelfAttention.v | Linear | 589 K \n", + "347 | model.decoder.block.5.layer.0.SelfAttention.o | Linear | 589 K \n", + "348 | model.decoder.block.5.layer.0.layer_norm | T5LayerNorm | 768 \n", + "349 | model.decoder.block.5.layer.0.dropout | Dropout | 0 \n", + "350 | model.decoder.block.5.layer.1 | T5LayerCrossAttention | 2 M \n", + "351 | model.decoder.block.5.layer.1.EncDecAttention | T5Attention | 2 M \n", + "352 | model.decoder.block.5.layer.1.EncDecAttention.q | Linear | 589 K \n", + "353 | model.decoder.block.5.layer.1.EncDecAttention.k | Linear | 589 K \n", + "354 | model.decoder.block.5.layer.1.EncDecAttention.v | Linear | 589 K \n", + "355 | model.decoder.block.5.layer.1.EncDecAttention.o | Linear | 589 K \n", + "356 | model.decoder.block.5.layer.1.layer_norm | T5LayerNorm | 768 \n", + "357 | model.decoder.block.5.layer.1.dropout | Dropout | 0 \n", + "358 | model.decoder.block.5.layer.2 | T5LayerFF | 4 M \n", + "359 | model.decoder.block.5.layer.2.DenseReluDense | T5DenseReluDense | 4 M \n", + "360 | model.decoder.block.5.layer.2.DenseReluDense.wi | Linear | 2 M \n", + "361 | model.decoder.block.5.layer.2.DenseReluDense.wo | Linear | 2 M \n", + "362 | model.decoder.block.5.layer.2.DenseReluDense.dropout | Dropout | 0 \n", + "363 | model.decoder.block.5.layer.2.layer_norm | T5LayerNorm | 768 \n", + "364 | model.decoder.block.5.layer.2.dropout | Dropout | 0 \n", + "365 | model.decoder.block.6 | T5Block | 9 M \n", + "366 | model.decoder.block.6.layer | ModuleList | 9 M \n", + "367 | model.decoder.block.6.layer.0 | T5LayerSelfAttention | 2 M \n", + "368 | model.decoder.block.6.layer.0.SelfAttention | T5Attention | 2 M \n", + "369 | model.decoder.block.6.layer.0.SelfAttention.q | Linear | 589 K \n", + "370 | model.decoder.block.6.layer.0.SelfAttention.k | Linear | 589 K \n", + "371 | model.decoder.block.6.layer.0.SelfAttention.v | Linear | 589 K \n", + "372 | model.decoder.block.6.layer.0.SelfAttention.o | Linear | 589 K \n", + "373 | model.decoder.block.6.layer.0.layer_norm | T5LayerNorm | 768 \n", + "374 | model.decoder.block.6.layer.0.dropout | Dropout | 0 \n", + "375 | model.decoder.block.6.layer.1 | T5LayerCrossAttention | 2 M \n", + "376 | model.decoder.block.6.layer.1.EncDecAttention | T5Attention | 2 M \n", + "377 | model.decoder.block.6.layer.1.EncDecAttention.q | Linear | 589 K \n", + "378 | model.decoder.block.6.layer.1.EncDecAttention.k | Linear | 589 K \n", + "379 | model.decoder.block.6.layer.1.EncDecAttention.v | Linear | 589 K \n", + "380 | model.decoder.block.6.layer.1.EncDecAttention.o | Linear | 589 K \n", + "381 | model.decoder.block.6.layer.1.layer_norm | T5LayerNorm | 768 \n", + "382 | model.decoder.block.6.layer.1.dropout | Dropout | 0 \n", + "383 | model.decoder.block.6.layer.2 | T5LayerFF | 4 M \n", + "384 | model.decoder.block.6.layer.2.DenseReluDense | T5DenseReluDense | 4 M \n", + "385 | model.decoder.block.6.layer.2.DenseReluDense.wi | Linear | 2 M \n", + "386 | model.decoder.block.6.layer.2.DenseReluDense.wo | Linear | 2 M \n", + "387 | model.decoder.block.6.layer.2.DenseReluDense.dropout | Dropout | 0 \n", + "388 | model.decoder.block.6.layer.2.layer_norm | T5LayerNorm | 768 \n", + "389 | model.decoder.block.6.layer.2.dropout | Dropout | 0 \n", + "390 | model.decoder.block.7 | T5Block | 9 M \n", + "391 | model.decoder.block.7.layer | ModuleList | 9 M \n", + "392 | model.decoder.block.7.layer.0 | T5LayerSelfAttention | 2 M \n", + "393 | model.decoder.block.7.layer.0.SelfAttention | T5Attention | 2 M \n", + "394 | model.decoder.block.7.layer.0.SelfAttention.q | Linear | 589 K \n", + "395 | model.decoder.block.7.layer.0.SelfAttention.k | Linear | 589 K \n", + "396 | model.decoder.block.7.layer.0.SelfAttention.v | Linear | 589 K \n", + "397 | model.decoder.block.7.layer.0.SelfAttention.o | Linear | 589 K \n", + "398 | model.decoder.block.7.layer.0.layer_norm | T5LayerNorm | 768 \n", + "399 | model.decoder.block.7.layer.0.dropout | Dropout | 0 \n", + "400 | model.decoder.block.7.layer.1 | T5LayerCrossAttention | 2 M \n", + "401 | model.decoder.block.7.layer.1.EncDecAttention | T5Attention | 2 M \n", + "402 | model.decoder.block.7.layer.1.EncDecAttention.q | Linear | 589 K \n", + "403 | model.decoder.block.7.layer.1.EncDecAttention.k | Linear | 589 K \n", + "404 | model.decoder.block.7.layer.1.EncDecAttention.v | Linear | 589 K \n", + "405 | model.decoder.block.7.layer.1.EncDecAttention.o | Linear | 589 K \n", + "406 | model.decoder.block.7.layer.1.layer_norm | T5LayerNorm | 768 \n", + "407 | model.decoder.block.7.layer.1.dropout | Dropout | 0 \n", + "408 | model.decoder.block.7.layer.2 | T5LayerFF | 4 M \n", + "409 | model.decoder.block.7.layer.2.DenseReluDense | T5DenseReluDense | 4 M \n", + "410 | model.decoder.block.7.layer.2.DenseReluDense.wi | Linear | 2 M \n", + "411 | model.decoder.block.7.layer.2.DenseReluDense.wo | Linear | 2 M \n", + "412 | model.decoder.block.7.layer.2.DenseReluDense.dropout | Dropout | 0 \n", + "413 | model.decoder.block.7.layer.2.layer_norm | T5LayerNorm | 768 \n", + "414 | model.decoder.block.7.layer.2.dropout | Dropout | 0 \n", + "415 | model.decoder.block.8 | T5Block | 9 M \n", + "416 | model.decoder.block.8.layer | ModuleList | 9 M \n", + "417 | model.decoder.block.8.layer.0 | T5LayerSelfAttention | 2 M \n", + "418 | model.decoder.block.8.layer.0.SelfAttention | T5Attention | 2 M \n", + "419 | model.decoder.block.8.layer.0.SelfAttention.q | Linear | 589 K \n", + "420 | model.decoder.block.8.layer.0.SelfAttention.k | Linear | 589 K \n", + "421 | model.decoder.block.8.layer.0.SelfAttention.v | Linear | 589 K \n", + "422 | model.decoder.block.8.layer.0.SelfAttention.o | Linear | 589 K \n", + "423 | model.decoder.block.8.layer.0.layer_norm | T5LayerNorm | 768 \n", + "424 | model.decoder.block.8.layer.0.dropout | Dropout | 0 \n", + "425 | model.decoder.block.8.layer.1 | T5LayerCrossAttention | 2 M \n", + "426 | model.decoder.block.8.layer.1.EncDecAttention | T5Attention | 2 M \n", + "427 | model.decoder.block.8.layer.1.EncDecAttention.q | Linear | 589 K \n", + "428 | model.decoder.block.8.layer.1.EncDecAttention.k | Linear | 589 K \n", + "429 | model.decoder.block.8.layer.1.EncDecAttention.v | Linear | 589 K \n", + "430 | model.decoder.block.8.layer.1.EncDecAttention.o | Linear | 589 K \n", + "431 | model.decoder.block.8.layer.1.layer_norm | T5LayerNorm | 768 \n", + "432 | model.decoder.block.8.layer.1.dropout | Dropout | 0 \n", + "433 | model.decoder.block.8.layer.2 | T5LayerFF | 4 M \n", + "434 | model.decoder.block.8.layer.2.DenseReluDense | T5DenseReluDense | 4 M \n", + "435 | model.decoder.block.8.layer.2.DenseReluDense.wi | Linear | 2 M \n", + "436 | model.decoder.block.8.layer.2.DenseReluDense.wo | Linear | 2 M \n", + "437 | model.decoder.block.8.layer.2.DenseReluDense.dropout | Dropout | 0 \n", + "438 | model.decoder.block.8.layer.2.layer_norm | T5LayerNorm | 768 \n", + "439 | model.decoder.block.8.layer.2.dropout | Dropout | 0 \n", + "440 | model.decoder.block.9 | T5Block | 9 M \n", + "441 | model.decoder.block.9.layer | ModuleList | 9 M \n", + "442 | model.decoder.block.9.layer.0 | T5LayerSelfAttention | 2 M \n", + "443 | model.decoder.block.9.layer.0.SelfAttention | T5Attention | 2 M \n", + "444 | model.decoder.block.9.layer.0.SelfAttention.q | Linear | 589 K \n", + "445 | model.decoder.block.9.layer.0.SelfAttention.k | Linear | 589 K \n", + "446 | model.decoder.block.9.layer.0.SelfAttention.v | Linear | 589 K \n", + "447 | model.decoder.block.9.layer.0.SelfAttention.o | Linear | 589 K \n", + "448 | model.decoder.block.9.layer.0.layer_norm | T5LayerNorm | 768 \n", + "449 | model.decoder.block.9.layer.0.dropout | Dropout | 0 \n", + "450 | model.decoder.block.9.layer.1 | T5LayerCrossAttention | 2 M \n", + "451 | model.decoder.block.9.layer.1.EncDecAttention | T5Attention | 2 M \n", + "452 | model.decoder.block.9.layer.1.EncDecAttention.q | Linear | 589 K \n", + "453 | model.decoder.block.9.layer.1.EncDecAttention.k | Linear | 589 K \n", + "454 | model.decoder.block.9.layer.1.EncDecAttention.v | Linear | 589 K \n", + "455 | model.decoder.block.9.layer.1.EncDecAttention.o | Linear | 589 K \n", + "456 | model.decoder.block.9.layer.1.layer_norm | T5LayerNorm | 768 \n", + "457 | model.decoder.block.9.layer.1.dropout | Dropout | 0 \n", + "458 | model.decoder.block.9.layer.2 | T5LayerFF | 4 M \n", + "459 | model.decoder.block.9.layer.2.DenseReluDense | T5DenseReluDense | 4 M \n", + "460 | model.decoder.block.9.layer.2.DenseReluDense.wi | Linear | 2 M \n", + "461 | model.decoder.block.9.layer.2.DenseReluDense.wo | Linear | 2 M \n", + "462 | model.decoder.block.9.layer.2.DenseReluDense.dropout | Dropout | 0 \n", + "463 | model.decoder.block.9.layer.2.layer_norm | T5LayerNorm | 768 \n", + "464 | model.decoder.block.9.layer.2.dropout | Dropout | 0 \n", + "465 | model.decoder.block.10 | T5Block | 9 M \n", + "466 | model.decoder.block.10.layer | ModuleList | 9 M \n", + "467 | model.decoder.block.10.layer.0 | T5LayerSelfAttention | 2 M \n", + "468 | model.decoder.block.10.layer.0.SelfAttention | T5Attention | 2 M \n", + "469 | model.decoder.block.10.layer.0.SelfAttention.q | Linear | 589 K \n", + "470 | model.decoder.block.10.layer.0.SelfAttention.k | Linear | 589 K \n", + "471 | model.decoder.block.10.layer.0.SelfAttention.v | Linear | 589 K \n", + "472 | model.decoder.block.10.layer.0.SelfAttention.o | Linear | 589 K \n", + "473 | model.decoder.block.10.layer.0.layer_norm | T5LayerNorm | 768 \n", + "474 | model.decoder.block.10.layer.0.dropout | Dropout | 0 \n", + "475 | model.decoder.block.10.layer.1 | T5LayerCrossAttention | 2 M \n", + "476 | model.decoder.block.10.layer.1.EncDecAttention | T5Attention | 2 M \n", + "477 | model.decoder.block.10.layer.1.EncDecAttention.q | Linear | 589 K \n", + "478 | model.decoder.block.10.layer.1.EncDecAttention.k | Linear | 589 K \n", + "479 | model.decoder.block.10.layer.1.EncDecAttention.v | Linear | 589 K \n", + "480 | model.decoder.block.10.layer.1.EncDecAttention.o | Linear | 589 K \n", + "481 | model.decoder.block.10.layer.1.layer_norm | T5LayerNorm | 768 \n", + "482 | model.decoder.block.10.layer.1.dropout | Dropout | 0 \n", + "483 | model.decoder.block.10.layer.2 | T5LayerFF | 4 M \n", + "484 | model.decoder.block.10.layer.2.DenseReluDense | T5DenseReluDense | 4 M \n", + "485 | model.decoder.block.10.layer.2.DenseReluDense.wi | Linear | 2 M \n", + "486 | model.decoder.block.10.layer.2.DenseReluDense.wo | Linear | 2 M \n", + "487 | model.decoder.block.10.layer.2.DenseReluDense.dropout | Dropout | 0 \n", + "488 | model.decoder.block.10.layer.2.layer_norm | T5LayerNorm | 768 \n", + "489 | model.decoder.block.10.layer.2.dropout | Dropout | 0 \n", + "490 | model.decoder.block.11 | T5Block | 9 M \n", + "491 | model.decoder.block.11.layer | ModuleList | 9 M \n", + "492 | model.decoder.block.11.layer.0 | T5LayerSelfAttention | 2 M \n", + "493 | model.decoder.block.11.layer.0.SelfAttention | T5Attention | 2 M \n", + "494 | model.decoder.block.11.layer.0.SelfAttention.q | Linear | 589 K \n", + "495 | model.decoder.block.11.layer.0.SelfAttention.k | Linear | 589 K \n", + "496 | model.decoder.block.11.layer.0.SelfAttention.v | Linear | 589 K \n", + "497 | model.decoder.block.11.layer.0.SelfAttention.o | Linear | 589 K \n", + "498 | model.decoder.block.11.layer.0.layer_norm | T5LayerNorm | 768 \n", + "499 | model.decoder.block.11.layer.0.dropout | Dropout | 0 \n", + "500 | model.decoder.block.11.layer.1 | T5LayerCrossAttention | 2 M \n", + "501 | model.decoder.block.11.layer.1.EncDecAttention | T5Attention | 2 M \n", + "502 | model.decoder.block.11.layer.1.EncDecAttention.q | Linear | 589 K \n", + "503 | model.decoder.block.11.layer.1.EncDecAttention.k | Linear | 589 K \n", + "504 | model.decoder.block.11.layer.1.EncDecAttention.v | Linear | 589 K \n", + "505 | model.decoder.block.11.layer.1.EncDecAttention.o | Linear | 589 K \n", + "506 | model.decoder.block.11.layer.1.layer_norm | T5LayerNorm | 768 \n", + "507 | model.decoder.block.11.layer.1.dropout | Dropout | 0 \n", + "508 | model.decoder.block.11.layer.2 | T5LayerFF | 4 M \n", + "509 | model.decoder.block.11.layer.2.DenseReluDense | T5DenseReluDense | 4 M \n", + "510 | model.decoder.block.11.layer.2.DenseReluDense.wi | Linear | 2 M \n", + "511 | model.decoder.block.11.layer.2.DenseReluDense.wo | Linear | 2 M \n", + "512 | model.decoder.block.11.layer.2.DenseReluDense.dropout | Dropout | 0 \n", + "513 | model.decoder.block.11.layer.2.layer_norm | T5LayerNorm | 768 \n", + "514 | model.decoder.block.11.layer.2.dropout | Dropout | 0 \n", + "515 | model.decoder.final_layer_norm | T5LayerNorm | 768 \n", + "516 | model.decoder.dropout | Dropout | 0 \n", + "517 | model.lm_head | Linear | 24 M \n" + ], + "name": "stderr" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "915a0b65612243668570c555a47a6c37", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "\r" + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "3ec26f803d124dd0877e1ce0e3517f68", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "/pytorch/torch/csrc/utils/python_arg_parser.cpp:756: UserWarning: This overload of add_ is deprecated:\n", + "\tadd_(Number alpha, Tensor other)\n", + "Consider using one of the following signatures instead:\n", + "\tadd_(Tensor other, *, Number alpha)\n" + ], + "name": "stderr" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "dbe7a4854b8f420faaea8de4583fb1f0", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "INFO:__main__:***** Validation results *****\n", + "INFO:__main__:avg_val_loss = tensor(0.0839, device='cuda:0')\n", + "\n", + "INFO:__main__:loss = tensor(0.0199, device='cuda:0')\n", + "\n", + "INFO:__main__:train_loss = tensor(0.0199, device='cuda:0')\n", + "\n", + "INFO:__main__:val_loss = tensor(0.0839, device='cuda:0')\n", + "\n" + ], + "name": "stderr" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "f40c9bf16c9a473ba758a6439dce2652", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "INFO:__main__:***** Validation results *****\n", + "INFO:__main__:avg_train_loss = tensor(0.2954, device='cuda:0')\n", + "\n", + "INFO:__main__:avg_val_loss = tensor(0.0874, device='cuda:0')\n", + "\n", + "INFO:__main__:epoch = 0\n", + "\n", + "INFO:__main__:loss = tensor(0.0066, device='cuda:0')\n", + "\n", + "INFO:__main__:train_loss = tensor(0.0066, device='cuda:0')\n", + "\n", + "INFO:__main__:val_loss = tensor(0.0874, device='cuda:0')\n", + "\n" + ], + "name": "stderr" + }, + { + "output_type": "stream", + "text": [ + "\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "1" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 30 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "l-obOz6v70iB" + }, + "source": [ + "!mkdir t5_base_imdb_sentiment" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "OQBJcrrWi2vC", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 50 + }, + "outputId": "a98adf77-6e23-4304-8ccc-5b13a33a2a32" + }, + "source": [ + "## save the model this way so next time you can load it using T5ForConditionalGeneration.from_pretrained\n", + "model.model.save_pretrained('t5_base_imdb_sentiment')" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "INFO:transformers.configuration_utils:Configuration saved in t5_base_imdb_sentiment/config.json\n", + "INFO:transformers.modeling_utils:Model weights saved in t5_base_imdb_sentiment/pytorch_model.bin\n" + ], + "name": "stderr" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "XhjELPOk7-cz" + }, + "source": [ + "# !cp -r t5_base_imdb_sentiment drive/My\\ Drive/" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "brPOSAkjNP5t" + }, + "source": [ + "### Eval" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_7SuVh05lDrJ" + }, + "source": [ + "For inference we will use the `generate` method with greedy decoding with max length 2." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "25jbT49CVoXN" + }, + "source": [ + "import textwrap\n", + "from tqdm.auto import tqdm\n", + "from sklearn import metrics" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cyriGR20lSRa" + }, + "source": [ + "Let's visualize few predictions on test dataset" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "wwJ998sMz2Ci" + }, + "source": [ + "dataset = ImdbDataset(tokenizer, 'aclImdb', 'test', max_len=512)\n", + "loader = DataLoader(dataset, batch_size=32, shuffle=True)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "2LQtN5b90TyW" + }, + "source": [ + "it = iter(loader)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "TRD03teH0YMe", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "d43041e6-5d7d-49d5-e91a-7530c5d1d6b1" + }, + "source": [ + "batch = next(it)\n", + "batch[\"source_ids\"].shape" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([32, 512])" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 36 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "eewDktozk7GN" + }, + "source": [ + "outs = model.model.generate(input_ids=batch['source_ids'].cuda(),\n", + " attention_mask=batch['source_mask'].cuda(),\n", + " max_length=2)\n", + "\n", + "dec = [tokenizer.decode(ids) for ids in outs]\n", + "\n", + "texts = [tokenizer.decode(ids) for ids in batch['source_ids']]\n", + "targets = [tokenizer.decode(ids) for ids in batch['target_ids']]" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "9vBe0UNw7cHY", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "2f0171ac-8d7d-41db-db31-d57bf72bc205" + }, + "source": [ + "for i in range(32):\n", + " lines = textwrap.wrap(\"Review:\\n%s\\n\" % texts[i], width=100)\n", + " print(\"\\n\".join(lines))\n", + " print(\"\\nActual sentiment: %s\" % targets[i])\n", + " print(\"Predicted sentiment: %s\" % dec[i])\n", + " print(\"=====================================================================\\n\")" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Review: I dont know where to begin Perhaps the whole idea of this movie was just a disaster waiting\n", + "to happen There is nothing slightly humorous about a kidnapping I dont know what was more\n", + "offensivethe subject matter or David Arquettes performance It was like watching a bull get its penis\n", + "cut off although I think the bull felt better afterwards The filmmakers should find something about\n", + "Sinatra other than his sons kidnapping to show like I dont know his TALENT AS A SINGER His family\n", + "shouldnt have to relive that horror Thank GOD it was just shown on HBO and not released in theaters\n", + "Please dont watch this if you have any self respect\n", + "\n", + "Actual sentiment: negative\n", + "Predicted sentiment: negative\n", + "=====================================================================\n", + "\n", + "Review: A fine performance by Vittorio Mezzogiorno and a masterful one by JeanHugues Anglade adorn\n", + "this stange tale of lust desire and alienation in France The work of the two lead performers is\n", + "strikingsubtle intense and passionate Alas the script is deliberately turgid and sordid and the\n", + "overall effect leaves one with a downcast spirit Still those who can appreciate fine quality acting\n", + "will be able to savor the courageous work of the leads in this often difficult film journey of\n", + "Gallic low life\n", + "\n", + "Actual sentiment: positive\n", + "Predicted sentiment: negative\n", + "=====================================================================\n", + "\n", + "Review: I almost stopped watching Hindi movies because of the mediocre quality and story lines One\n", + "exception for this is Ramgopal Verma movies This is a nice movie with great performances from the\n", + "star cast This is must see movie for those who are sick of watching stupid dancing and love stories\n", + "The adaptation of the story and characterization was exceptional goodYou should watch this movie for\n", + "Nana Patekar based on the life of Mumbai cop Daya Naik this movie deals in a more realistic way The\n", + "film delves into the life of the common man which he has apart from being an encounter specialist I\n", + "rate this as one of the best movie of the year\n", + "\n", + "Actual sentiment: positive\n", + "Predicted sentiment: positive\n", + "=====================================================================\n", + "\n", + "Review: This was a really interesting Halloween film I wasnt to thrilled with the whole Thorn theory\n", + "but it still makes for a good film I liked getting to see Tommy Doyle back but sadly Donald\n", + "Pleasance died right after shooting The film had a really REALLY bad director who didnt give a flip\n", + "about the series from what I heard treated Donald bad and wouldnt let Danielle Harris come back as\n", + "Jamie Its like he was just trying to bring down the film but I still liked it There were alot of\n", + "cuts and music changes and if youre lucky you can get the Producers Cut which features over 40 min\n", + "of never before scenes With those scenes it turns into a whole new movie Check it out if you have\n", + "the chance\n", + "\n", + "Actual sentiment: positive\n", + "Predicted sentiment: positive\n", + "=====================================================================\n", + "\n", + "Review: Cheerleader Massacre was supposed to be the fourth installment of the Slumber Party Massacre\n", + "series if thats what they were doing which it is considering ONE actress from the original returns\n", + "in a small cameo role they have failed miserably and made by far the worst installment of the\n", + "quadrilogy Cheerleader Massacre seamlessly combines bad acting a horrible plot a dumb killer dull\n", + "and boring deaths boring scenery and hideous camera work to make it one of the worst films ever made\n", + "Did I already mention how bad it was Dont get me wrong this cheesy and retarded excuse for a horror\n", + "film is nowhere near as bad as Napoleon Dynamite but it is undeniably a horrible movieCheerleader\n", + "Massacre is an exact polar opposite of the original Slumber Party Massacre Stay away by all means\n", + "This movie is utter garbage\n", + "\n", + "Actual sentiment: negative\n", + "Predicted sentiment: negative\n", + "=====================================================================\n", + "\n", + "Review: This movie will give me nightmares I will wake up drenched in sweat screaming I didnt make\n", + "this film please dont blame me I honestly think it would have been more entertaining to watch a fat\n", + "guy eating lard in his moms basement for a hour or two than to watch this crap I understand money\n", + "was tight but goddamn what the hell were they thinking there was no thought plot or effort put into\n", + "this This movie needs a warning Please for the love of god dont fund the drama department a the\n", + "local JC On an other note these are the least likable characters I have ever seen and I have seen\n", + "movies with Hitler in them So lastly take my advice the next time you even think about renting this\n", + "just pop a few hundred Adivl and let the sleep come\n", + "\n", + "Actual sentiment: negative\n", + "Predicted sentiment: negative\n", + "=====================================================================\n", + "\n", + "Review: In this movie Virtual Sexuality the 17 year old Justine is not lucky in love One day when\n", + "she is stood up she goes with her friend to a virual reality conference there she is introduced with\n", + "a machine that can change your look dody and whatever you like in Virtual Reality She decides to try\n", + "it out but begins to make a boyfriend of her own her dreamdate Then suddenly there is an explosion\n", + "in a gas pipe and her creation comes to life Ill say no more youll have to watch the movie which is\n", + "quite fun to watch\n", + "\n", + "Actual sentiment: positive\n", + "Predicted sentiment: positive\n", + "=====================================================================\n", + "\n", + "Review: Why does C Thomas Howell do these movies Cruise Howells one time costar does a huge\n", + "blockbuster of WOTW and Howell follows with this lame effortWhere do I start here Production Values\n", + "Ill start with the good stuff The look and feel of some of the scenes in this movie are not too bad\n", + "to be honest The setups are okay in spots and the direction not too badScript Terrible A series of\n", + "clunky scenes that could have been put in any order you like permeate throughout the movie The\n", + "amount of times the scene faded to black and reemerged a second later in the same room was\n", + "uncountable Very poor storyline but so was the Cruise WOTW takes some blame but an abysmal\n", + "screenplay kills it offSpecial FX Okay I dont want to be too harsh here as I imagine the budget was\n", + "smaller than Cruises lunch bill but in the overall context of the film the effects are badly done\n", + "Some shots are quite impressive mainly far off destruction shots of bridges Washington liner But in\n", + "the main the alien machines and tentacles themselves are dreadful Also the camera quality is fuzzy\n", + "on some shots and cuts away entirely on othersActing Im a fan of Howell but as he has reduced\n", + "himself to acting in these lowbudget flicks he has succumbed to the overacting bug a long time ago\n", + "Look at his performance in The Hitcher and compare it to this movie There is no comparison He\n", + "overdoes his facial expressions his flailing arms and legs where did he get that running style and\n", + "for a final coupdegras look at the scene where he loses the photo of his family Hysterical But after\n", + "saying all that he is still the best actor on show here Busey is embarrassing to look at and Peter\n", + "Green Zed is truly dead now baby mumbles incoherently through his one and only scene I honestly\n", + "could not understand one word he said I even went so far as trying to enable the subtitles on that\n", + "scene but the DVD did not have subtitles This seems to be a real keepitinthefamily affair too as\n", + "Howells son the directors wife and the line producer all make it into the film None of them are\n", + "goodDirection not bad but not good eitherScore DismalOverall a lame duck effort that will don'thing\n", + "for Howell in his\n", + "\n", + "Actual sentiment: negative\n", + "Predicted sentiment: negative\n", + "=====================================================================\n", + "\n", + "Review: Another Asian horror movie packed with intense and creepy moments Another Asian horror\n", + "trademark is the complexity of the plot which is here as well MAJOR SPOILER WARNINGThe movie starts\n", + "pretty simple two sisters go to live with their dad and stepmother after being put in a mental\n", + "institution after their mother hanged herself The sisters seem very hostile towards their mother\n", + "especially the elder one and they seem to ignore their father All goes smoothly until the mother\n", + "locks the young sister in the wardrobe and the elder sister tells her father Then it hits you your\n", + "sister has been dead for years now It turns out the older sister is still not recovered from the\n", + "death of her mother and what we didnt know is that the wardrobe the mother was hanged in fell on the\n", + "younger sister and killed her as wellAs for the stepmother she is the alter ego of the older sister\n", + "revealed when the stepmother actually the sisters alter ego is sitting on a couch when the real\n", + "stepmother walks in I hope it has been made clearer for confused Asian horror fans out thereFinally\n", + "my favourite scene is the scene where the father invites friends over for dinner and one of the\n", + "friends starts to choke which erupts into a panic attack Very creepy 7 out of 10\n", + "\n", + "Actual sentiment: positive\n", + "Predicted sentiment: positive\n", + "=====================================================================\n", + "\n", + "Review: As a native of New Orleans I can state that almost everything in this movie from the\n", + "atrocious NAwlins dialect to the highly creative manipulation of Crescent City geography is horrible\n", + "This is another one of those Big Hollywood movies that decides to stereotype New Orleans as 1 A city\n", + "full of Frenchsounding idiots 2 A city full of people who sound as if theyve just returned from\n", + "Blanche Dubois summer home 3 A city of drunkards where every day is Mardi Gras 4 A city of deep\n", + "mystery where almost everyone practices or is a victim of voodoo I admit that maybe we are a city of\n", + "drunkards although every day is NOT Mardi Gras The Big Easy is one of the worst films about New\n", + "Orleans I wouldnt recommend it to anybody\n", + "\n", + "Actual sentiment: negative\n", + "Predicted sentiment: negative\n", + "=====================================================================\n", + "\n", + "Review: This selfimportant confusing b+w film watches like an infant on a very bad acid trip Youre\n", + "dealing with something that reminds you of a piece of rotting lettuce that accidentally fell out of\n", + "the back of a garbage truck no one cares to touch it because it will probably be washed away on its\n", + "own down the storm drain Theres no room for plot when youve got visceral imagery and subtle allegory\n", + "To me it seems like the director tries to make the next great art movie while begging for\n", + "intellectual accolades I didnt bring my beret either Watching this I felt almost insulted since the\n", + "film does such an effective job of distancing itself from you\n", + "\n", + "Actual sentiment: negative\n", + "Predicted sentiment: negative\n", + "=====================================================================\n", + "\n", + "Review: Mercy the movie actually starts out as a somewhat decent film and ellen barkin does give a\n", + "strong performance But if you have read the book and actually got to know the characters and cared\n", + "who done it the movie just does not compare It is always hard to brink a book onto film and\n", + "unfortunatley this one ends up failing 3 out of 10\n", + "\n", + "Actual sentiment: negative\n", + "Predicted sentiment: negative\n", + "=====================================================================\n", + "\n", + "Review: What in the world This piece of gambling cinema would have been suitable for the Lifetime\n", + "Network Michael Imperoli is a good actor but I think his portrayal as Stu fell short The montages\n", + "were unbearable and too many The supporting cast where are you Whoever did the casting should be\n", + "partially at fault The cinematography was useless A gambling story with an after school feel to it\n", + "Stories of this sort should be left for the Oliver Stones of the world It would still suck ass but\n", + "at least it would be fun to watch It was an attempt that lost its wheels before the race ever begun\n", + "Mario Andretti in the 1982 Indy 500 came to mind\n", + "\n", + "Actual sentiment: negative\n", + "Predicted sentiment: negative\n", + "=====================================================================\n", + "\n", + "Review: This movie definitely made me laugh but that doesnt mean it was exactly funny Well then\n", + "again me and my friends had a lot of fun watching itI doubt there is anything about this movie that\n", + "hasnt been done at least twice before just like the plot itself All of the characters are overused\n", + "movie cliché cardboardbox roles that dont even require acting skills accordingly such skills are not\n", + "delivered We have the corrupt cop a ruthless killer who claims to care about his men and their\n", + "families whilst caring nothing about people he shoots in the forehead at so close a range as to have\n", + "blood spat on his face We have the wornout cop on the edge so nicely pointed at in the discussion\n", + "boards of this movie we have the old onedayawayfromretirementcop who just about everyone must have\n", + "immediately identified as the most likely man on the inside since he had most to gain and he didnt\n", + "utter a trustworthy word throughout the movie About as seethrough as a glass house on a sunny day\n", + "The big black gangster king was a copy of all previous big black gangster kings in movie history\n", + "they couldve just called him Marcellus Wallace but just slightly tougher and more ruthless because\n", + "something has to emphasize that we also know Laurence Fishburne from actually good movies Then we\n", + "finally have the HIGHLY EDUCATED doctor who cant think of anything reasonable to do as soon as the\n", + "situation differs from her ordinary life and who spends the majority of the movie sitting in a\n", + "corner helplessly trying to figure out how to hold on to the weapon she was given NOT USING ITThe\n", + "whole siege story is not interesting not original having been used twice before and this movie\n", + "manages to add absolutely nothing interesting to it There is the initial probe then the laying of\n", + "the siege then the assault then the escape attempts Meanwhile a bunch of strained stressed freaked\n", + "out cops and thugs manage to hold off a Police assault team with hightech equipment and the quite\n", + "important advantage of VISION Then again in deep night with the power cut and with a snow storm\n", + "raging overhead there is definitely a lot of light coming in so who really cares about night\n", + "visionBut the best part comes right at the end In the first scenes showing Precinct 13 we see it is\n", + "situated in an outskirt of an industrial city factories and office buildings surround it on all\n", + "sides From this point\n", + "\n", + "Actual sentiment: negative\n", + "Predicted sentiment: negative\n", + "=====================================================================\n", + "\n", + "Review: Some people say this show was good in its early years I disagree with all of em The show is\n", + "just plain stupid and pathetic My mum hates it I hate it my dad hates it I dont know about my sister\n", + "but oh well Here some reasons why1 THE CHARACTERS Babies being used as grown up style characters are\n", + "stupid The babies are just precocious and annoying The grown ups and adults are dumb and unappealing\n", + "The worst character is that Angelica Pickles she really does it in for your ear drums when you had a\n", + "long hard and miserable day at the office and also that Kimi Finster who appears later on she is too\n", + "over optimistic and a pain in the butt She cant decided whither she is French or Japanese it doesnt\n", + "matter know you are a American Citizen know and thats that Oh what am I talking about all the\n", + "characters from this show suck2 THE STORIES The stories are unoriginal and dumb The make it like the\n", + "babies go off on a great adventure yeah to the back yard shed In one episode that little goofy brat\n", + "Tommy Pickles the Leader broke in to a televisions control room and literally almost destroyed it\n", + "Dont give kids any idea to smash up normal TV Stations control rooms they pay a awful lot of money\n", + "for them in real life I can imagine what the broadcasters must of felt like airing this episode they\n", + "will probably start staring at their machines throughout the day scared that a baby will brake in\n", + "Sad3 OVER RATED The show has been dragging on for years now and people are still making up stories\n", + "and new series and spinoffs for this Get off The Simpsons have been going for nearly the same amount\n", + "of time as this but they are much better and funnier than babies The show is just plain over rated\n", + "People where is your common senseAnyway I surprised TV Stations across the world want to air this\n", + "series even off today The show is utter junk and should have never been produced The two movies for\n", + "this cartoons sucked just the same 210\n", + "\n", + "Actual sentiment: negative\n", + "Predicted sentiment: negative\n", + "=====================================================================\n", + "\n", + "Review: What can I saynot much to this one at all Pretty dull and uninterestingThe actors\n", + "performances are just OK The only one that shines in any way is Simmons but he only has maybe 3\n", + "scenes I understand that by keeping his screen time to a minimum he retains the mysterious psychic\n", + "aura he has but I cant help but feel his talent was wasted No one else rose above mediocreThe story\n", + "itself seems like it may be intriguing at the beginning but then just doesnt go anywhere There wasnt\n", + "a single scene in the movie that impressed me or made me feel like I had just seen something special\n", + "The cinematography was fairly blandI mean desert in a washed out sort of sepianot very inspiringThe\n", + "story of his childhood pal back outta prison seemed only partially thought out and didnt really add\n", + "anything to the story other than making an average Twilight Zone script into a full length\n", + "featureDrab\n", + "\n", + "Actual sentiment: negative\n", + "Predicted sentiment: negative\n", + "=====================================================================\n", + "\n", + "Review: I kept watching it because it seemed like the plot was going somewhere When it ambiguously\n", + "got there I was very disappointed Im going to tell you what really happened in the next sentence But\n", + "maybe I wont Maybe Ill just imply something will happen The writers lacked any imagination This is\n", + "not even a B movie its a made for TV B movie\n", + "\n", + "Actual sentiment: negative\n", + "Predicted sentiment: negative\n", + "=====================================================================\n", + "\n", + "Review: Straight up I love this film I love everything about it It has a great soundtrack it has a\n", + "lot of recognizable faces and it is funny as hell There are so many plots in this film and every one\n", + "of them is funny in one way or anotherWhere as Spicolli lit up the screen two years back Drake is\n", + "almost as memorable of a character All he wants to do is have fun He moves out of the house without\n", + "his parents consent he skips work whenever he feels like it he is obsessed with sex he loves his\n", + "drugs and booze and he tries to be a good friend It is his lacksidaisical attitude that makes him\n", + "such a joy to watch And he comes out with some great lines And there are so many tiny observations\n", + "that you dont see coming but they make you laugh at the sheer velocity when it hits you One\n", + "particular moment is when Tommy and Bill are talking about Bills ex girlfriend dating someone else\n", + "now At the end of the conversation Tommy takes his huge beer bottle and just throws it over his\n", + "shoulder casually He then says good night and the scene ends It is a perfect scene Tommys world is\n", + "his own He really lives to party and have fun When the conversation is over his time is over and he\n", + "doesnt care who he offends in the process He has an innocence about him Its casual is his favourite\n", + "sayingAnother such classic scene is Reggie handing Bill a donut He says something to him that me and\n", + "my friends will never forget because we rewound the film ten times and watched that part over and\n", + "over again and hurt ourselves laughing It has to be seen to be appreciatedWild Life is a throw back\n", + "to when teen comedies were funny raunchy had a good ear entertained us and just wanted us to get\n", + "lost in their world for 90 minutes Wild Life does all those things perfectly If this is a film that\n", + "you havent seen give it a chance It is a classicAlso check out the army store guy that Jim has\n", + "problems with He is a very familiar face now and it is his first role on the big screen\n", + "\n", + "Actual sentiment: positive\n", + "Predicted sentiment: positive\n", + "=====================================================================\n", + "\n", + "Review: This movie is a real shame not just for the plotthe empty performance of the characters it\n", + "is for the lack of creativity from the director and all the crew this is maybe one of the worst\n", + "movies of all timesand it is hard to believe that is the sequel of one of the most famous movies of\n", + "the 90sI am a great fan of The Mask when I went to see this movie I was expecting to a movie with a\n", + "good sense of humor a movie with a acceptable plot instead I saw a really bad copy of Chuck Jones\n", + "and Tex Avery cartoons the movie was not funny even for my 7 years old sister so I wonderWhat was\n", + "wrong New Line CinemaWas it trying to repeat the success of the first movie or was it trying to\n", + "create another masterpiece like The Lord of the RingsBecause if they did they were completely out of\n", + "their minds\n", + "\n", + "Actual sentiment: negative\n", + "Predicted sentiment: negative\n", + "=====================================================================\n", + "\n", + "Review: eXistenZ is simply David Cronenbergs best movie All the people compare it to the Matrix\n", + "Theyre not even similar If you enjoyed Cronenbergs other works just a little bit youll love this one\n", + "\n", + "Actual sentiment: positive\n", + "Predicted sentiment: positive\n", + "=====================================================================\n", + "\n", + "Review: First time I ever felt I needed to write a reviewI have seen thousands of movies in my life\n", + "and I like a wide range of movies I am reasonably opened minded and can easily say I enjoyed a movie\n", + "while still saying yeah it was not good but I enjoyed it I can appreciate the mastery of great\n", + "movies like The Shaw shank redemption the godfather and American history X I can like good movies in\n", + "a genre like horror or comedy even if the movie might not be that great I can even enjoy a bad movie\n", + "that just happens to entertain me Bloodsport I also will try to rate movie fairly even if I did not\n", + "like it City lights by Charlie Chapin was not a movie I enjoyed but I can appreciate the acting and\n", + "story lines for the timeI think some people when they go on this site instead of randomly click a\n", + "rating should take a few ideas into account Try to rate the movie based on how good it actually was\n", + "Do not let your personal bias affect the rating Also look at other moves you rated and compare the\n", + "movie you are going to rateThis movie was the worst piece of trash I have ever seen 2 hours of my\n", + "life where just stolen The acting was awful across the board The scenes where choppy at best However\n", + "the real disgrace was the story The first 20 minutes we actually had a story that tried to make\n", + "sense and take the viewer from point A to B However after that it was a nightmare They kept trying\n", + "to add new elements but nothing was every explained Nothing really ever made sense was steward dead\n", + "is he alive did he hit by lighting was it really lighting was it aliens is he an alien etc The\n", + "ending tied nothing together and really did not answer any questions The only positive was nobody\n", + "cared we where just happy to leave the theater6510 What is wrong with some of you I will admit that\n", + "the 8 of us where so mad about seeing this we did think what would make it better and we decide to\n", + "tell a few of our friends that this movie was good so they would have to suffer and see this movie\n", + "What can I say misery loves company That is really the only reason I can see for a 65 ratingDo not\n", + "waste your life\n", + "\n", + "Actual sentiment: negative\n", + "Predicted sentiment: negative\n", + "=====================================================================\n", + "\n", + "Review: My main criticism with the movie is the animation I totally agree with everyone else it was\n", + "very poor Some of the characters seemed to have darker skin tones than they did in the first film\n", + "which is much better Also the background colours looked rushed and somewhat static It is also a\n", + "shame that Michael JFox didnt voice Milo he did such a good job and James Arnold Taylor wasnt sure\n", + "whether he was supposed to sound like Milo or Aladdin I have also taken into consideration the lack\n", + "of a good storyline the third story was confusing and clumsily told and the second story suffered\n", + "from poor scripting To make things worse the first one I cant even remember other than a fishing\n", + "village being haunted or something like that However there was some nice music and good voice\n", + "talents from John Mahoney Cree Summer Clancy Brown and Tom Wilson that saved the film from total\n", + "disaster All in all a disappointing sequel to a surprisingly good film 410 Bethany Cox\n", + "\n", + "Actual sentiment: negative\n", + "Predicted sentiment: negative\n", + "=====================================================================\n", + "\n", + "Review: Giant Robot was the most popular Japanese TV serial ever seen on Indian TV It was targeted\n", + "to children and we saw a robot for the first time in our life Many Indian children must have even\n", + "seen a machine for the first time outside the school textbooks The serial also showed a child in an\n", + "adults organization fighting evil No doubt many of us who have seen Giant Robot in our childhood\n", + "long for our own robots and as a stopgap arrangement look upon our computers in the same way This\n", + "show also portrayed ideal adults referring at Jerry Johnnys buddy friend and Unicorn chief Azuma We\n", + "grew to respect Japanese progress and still view Japan as the ideal Asian nationBTW at that time\n", + "there were no satellite TV channels in India and the govt owned broadcaster did not show much of\n", + "Disney cartoons I guess that was how child serials like giant Robot got appreciated Nowadays there\n", + "is Pokemon etc but they are no so fascinating or alluring as Giant robot\n", + "\n", + "Actual sentiment: positive\n", + "Predicted sentiment: positive\n", + "=====================================================================\n", + "\n", + "Review: This might sound weird but I only got to see the first movie The Emperors New Groove\n", + "yaddayadda a week ago and only because of one episode of the TV show I simply adore Kuzcos character\n", + "but Kronk isnt that bad either Anyway eventually I decided to watch the second film just so I\n", + "wouldve seen it Hoped it would be as good as the first one but Im sorry to tell this but the more\n", + "the humour got American the more I yawned I agreed with Kuzco when he started crying seeing all the\n", + "cheesy footageStill younger kids and probably veterans too will love this movie to bits if they like\n", + "the old school moralising Disney that is but I just had expectations that were an eensy teensy\n", + "little bit hell of a lot higher than they shouldve been Kronk is a lovely character being good\n", + "hearted and dumb all at once but it were Pacha and Kuzco in drag that woke me up at the end of the\n", + "movie Ill ignore Rudy for as far thats possibleAnyway great movie just not my style and as they say\n", + "you always have to be true to your groove\n", + "\n", + "Actual sentiment: negative\n", + "Predicted sentiment: positive\n", + "=====================================================================\n", + "\n", + "Review: The problem I have as a Finn is that most of the actors in this movie are in every Finnish\n", + "movie I have a feeling that Finland has only like five actors I think that if youre not from Finland\n", + "you really like this movie as a refreshing noveltyThis movie is about a dreadful chain of events\n", + "that affects a few people quite harshly Alcoholism cold climate and darkness may all be clichés but\n", + "theyre still very real in todays Finnish society A lot of people in Finland have depression\n", + "especially during winterThe tone of the movie is very melancholic I enjoyed it and Louhimes\n", + "directing was again very solid I liked this movie a lot only negative thing is that you see the same\n", + "faces that youve seen over and over again\n", + "\n", + "Actual sentiment: positive\n", + "Predicted sentiment: positive\n", + "=====================================================================\n", + "\n", + "Review: Most of the criticism of Attack of Show is from people who are unfairly comparing it to an\n", + "old computer TV program called The Screen Savers People are upset because G4 decided to cancel the\n", + "Screen Savers and replace it with the pop culture based Attack of the Show To compare the two shows\n", + "is like comparing apples to orangesAttack of the Show is a unique hour long program that covers\n", + "current Generation XY culture It features segments on moviestelevision panel discussions video games\n", + "new DVD releases sex advice new gadgets MP3 players cell phones etc comic booksgraphic novels\n", + "magazines and internet fads Its a fun show definitely worth checking out you are in your 20s or 30s\n", + "I give it an 8 out of 10\n", + "\n", + "Actual sentiment: positive\n", + "Predicted sentiment: positive\n", + "=====================================================================\n", + "\n", + "Review: This is the best comedy period It is so underrated Clever witty humor Great casting Jerry\n", + "Stiller is the jewel in the show he is so incredibly funny and quirky simply a comical genius Doug\n", + "and Carrie have great chemistry I so don't see what the hype is about when it comes to Everybody\n", + "loves Raymond it is SO overrated with lame jokes mostly forced humor and just not the witty show I\n", + "cant remember laughing in more than 1 episode King of Queens is a rare comedy that has all the right\n", + "ingredients to give you serious belly laughs which is normally caused by Arthur Spooner I think its\n", + "about time this comedy gets the hype it deserves and not the lame Raymond & CO\n", + "\n", + "Actual sentiment: positive\n", + "Predicted sentiment: positive\n", + "=====================================================================\n", + "\n", + "Review: Running Out of Time rests somewhere in the middle of Johnny Tos cannon in the solid good\n", + "category As a crime thriller its not terribly original or overwhelming and the action scenes will\n", + "not blow you away but it has something else going for it Its a Johnny To film after all it has\n", + "toAndy Lau has 72 hours to live He decides to play a strange catandmouse game with a hostage\n", + "negotiator of the HK police played by Lau Ching Wan Thats the plot in a nutshell On top of that To\n", + "piles layers of twists and turns that keep proceedings interesting throughout It occasionally\n", + "becomes too convoluted for its own sake but never lets it get the best of it However just as Johnny\n", + "To is about to hand over a slick and wellmade crime flick which lets face it are dimeadozen he slips\n", + "in bits and pieces that bring Running Out of Time alive as a full emotional experience providing the\n", + "soul and heart to the welloiled skeletonThe concepts of synchronism and minimalism staples in his\n", + "work are explored in great effect here Always subtle letting the images speak for themselves giving\n", + "them time to develop with long takes and slow tracking shots exemplary cutting to the score its all\n", + "here A small love story in a bus between Andy Lau and a girl is among the highlights of the film and\n", + "part of the heart Im talking about So simple yet so powerful Ditto for Laus and Lau Ching Wans car\n", + "scenes and the bowlingroom showdownHowever something stops me from claiming Running Out of Time is a\n", + "masterpiece To has all the ability and craftmanship down to a notch but he can also be too\n", + "workmanlike or bland at times When hes good hes REAL good There are even isolated moment of pure\n", + "brilliance that are just TOO good for their own sake leaving a bittersweet aftertaste for the rest\n", + "of the movie Im convinced that if he puts his heart to it he can make a really great film As it is\n", + "this is another one of his films that is flawed but enjoyable Underneath the slick HK style its the\n", + "black humour and heartfelt drama that makes this a compelling film Worth watching definitely\n", + "\n", + "Actual sentiment: positive\n", + "Predicted sentiment: positive\n", + "=====================================================================\n", + "\n", + "Review: Will there be please coming an end to hyping movies that are dealing about social conflicts\n", + "or other human disasters Okay Care is about childabuse Care is about perverts misusing boys in a\n", + "school and how disgusting it might be if its a movie with a poor script and made with bad playing\n", + "actors then it stays a bad movie Care is a movie that could have been but is it because it was a\n", + "tvmovie I dont know but everything seemed so limited that it comes over as some cheap movie that\n", + "will be seen by some housewifes and fathers who decide not to go to bed There are so many unanswered\n", + "things in this moviethe relation with his mother for instance or the death of some abused boy from\n", + "which we know nothing more Care should have been much much better\n", + "\n", + "Actual sentiment: negative\n", + "Predicted sentiment: negative\n", + "=====================================================================\n", + "\n", + "Review: This movie appears to have been overlooked by everyone Someone should bring it out on VHS\n", + "and DVD It is an excellent film and far superior to the one with Brooke Shields which was terrible\n", + "Jean Simmons deserves more credit than she is getting now days It would be nice if all her films\n", + "were offered on VHS or DVD Jean Simmons was and still is a very good actress She certainly was a\n", + "beauty In fact she is still a beauty She also has done extremely well on TV She is so much better\n", + "than many of the actors today\n", + "\n", + "Actual sentiment: positive\n", + "Predicted sentiment: positive\n", + "=====================================================================\n", + "\n", + "Review: When I heard there was to be an ABC Australian Broadcasting Corporation miniseries based on\n", + "life in Changi WWII POW camp with a focus on elements of comedy I was deeply sceptical and somewhat\n", + "criticalMy father had served in the second world war Such was the barbarity of the Japanese he was\n", + "able to talk about the horrors in and around Labuan where he was stationed until only quite recently\n", + "Along with my father I had been awarded the fortune of knowing many great men of stronger character\n", + "and spirit than I shall ever have who had witnessed acts of unspeakable barbarity at the hands of\n", + "the Empire of Japan and had never completely recovered The name Changi is destined to conjure\n", + "horrific images for ages to comeBut upon viewing I was highly impressed with the cast the characters\n", + "and the complex plotlines of this wonderful series I now regard Changi as the highlight of my week\n", + "bear in mind I have viewed only three episodes so far I hope the remaining episodes adhere to the\n", + "standards set by the first threeThe black humour works uncannily well however the flatulence jokes\n", + "are a little overdone and while much of the horror has been suppressed the series comes quite close\n", + "in relaying the undaunted spirit of the survivors who were able to later continue with their lives\n", + "in spite of the inhibiting memoriesThe flashback format of this series will be difficult for some to\n", + "followbut I can not think of no better way to do adequate justice to the men who suffered deep\n", + "emotional scarring proceeding internment when painfully suppressed experiences are remembered\n", + "sometimes years after the horrorOne of the darkest chapters of the Second World War the 20th century\n", + "and I would go so far as to say in the history of mankind is being relayed to a new generation\n", + "through this series and I hope it serves to relay the overwhelming adversity borne by the wartime\n", + "generationProceeding Changi I dont think I shall ever be able to listen to the poignant tune on the\n", + "road to Gundagai in the same way again Tune in\n", + "\n", + "Actual sentiment: positive\n", + "Predicted sentiment: positive\n", + "=====================================================================\n", + "\n", + "Review: I was pretty enthusiastic about seeing this movie when it came out Commercials for it made\n", + "it look quirky and I generally like Morgan Freeman and Chris Rock and the combination of the two\n", + "seemed like an interesting idea Sadly I was terribly disappointed with Nurse BettyPersonally Ive\n", + "usually found that graphic violence and comedy dont go all that well together and the only directors\n", + "that have ever combined the two successfully in my opinion are Tarantino and the Coens There isnt\n", + "that much violence in Nurse Betty but what violence is in it made me feel kind of weird when I was\n", + "supposed to laugh Of course for me part of the problem was also that there didnt seem to be many\n", + "places where I was being asked toThe film doesnt much work as a drama either Renee Zellwegers Betty\n", + "the storys protagonist is clinically insane and impossible to relate to in any real way I will say\n", + "Zellweger acts the role quite well and Freeman Rock and Greg Kinnear all do good jobs too The\n", + "problem is in the writing Freeman is the only person that gets to play an interesting character Its\n", + "really too bad 310\n", + "\n", + "Actual sentiment: negative\n", + "Predicted sentiment: negative\n", + "=====================================================================\n", + "\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lATfuiHYHq_1" + }, + "source": [ + "Now predict on all the test dataset" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "lvWQGLXhzHtn", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 66, + "referenced_widgets": [ + "6aaf51cb9ad44c94b6a174a8768904f7", + "51d23e1199274477a69557c74609afb2", + "029f74818c6842d7a28af62032418880", + "8db144e9144141779a1088c4bc000a99", + "210517aede4f4cfab9120fdeb3d8361a", + "df9bc2dc2b3c4fee98affdd7f5ca1ef6", + "b684a47485af4cb1934d57cbb03a4f57", + "942d20b134964d1d895af69938918464" + ] + }, + "outputId": "c0f5490b-2ade-4795-fa3d-1f0f1746e23c" + }, + "source": [ + "loader = DataLoader(dataset, batch_size=32, num_workers=4)\n", + "model.model.eval()\n", + "outputs = []\n", + "targets = []\n", + "for batch in tqdm(loader):\n", + " outs = model.model.generate(input_ids=batch['source_ids'].cuda(),\n", + " attention_mask=batch['source_mask'].cuda(),\n", + " max_length=2)\n", + "\n", + " dec = [tokenizer.decode(ids) for ids in outs]\n", + " target = [tokenizer.decode(ids) for ids in batch[\"target_ids\"]]\n", + "\n", + " outputs.extend(dec)\n", + " targets.extend(target)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "6aaf51cb9ad44c94b6a174a8768904f7", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZBxEcXeWGafd" + }, + "source": [ + "Let's check if the model generates any invalid text" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Y_qylwYGXgwY" + }, + "source": [ + "for i, out in enumerate(outputs):\n", + " if out not in ['positive', 'negative']:\n", + " print(i, 'detected invalid prediction')" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MpU_VkFGIgnw" + }, + "source": [ + "This great is great! Our model hasn't generated any invalid prediction. Let's calculate accuarcy and other metrics" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "EdJcQODoOChP", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "22fc6852-5443-43e4-d87e-5a5266ddffd9" + }, + "source": [ + "metrics.accuracy_score(targets, outputs)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.94712" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 41 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "YepnSgI5OKti", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 168 + }, + "outputId": "a2914edf-d572-4166-a886-6c0d731835e5" + }, + "source": [ + "print(metrics.classification_report(targets, outputs))" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " negative 0.95 0.95 0.95 12500\n", + " positive 0.95 0.95 0.95 12500\n", + "\n", + " accuracy 0.95 25000\n", + " macro avg 0.95 0.95 0.95 25000\n", + "weighted avg 0.95 0.95 0.95 25000\n", + "\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "UcZqrJELrRVw" + }, + "source": [], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Dhqigmiw2hVh" + }, + "source": [ + "## Emotion classification\n", + "\n", + "While most of the sentiment-analysis datasets are binary with 'postive' and 'negative' sentiments, [Elvis Saravia](https://twitter.com/omarsar0) has put together a great [dataset](https://github.com/dair-ai/emotion_dataset) for emotion recognition. The task is given some text classifiy the text into one of the following six emotions\n", + "\n", + "'sadness', 'joy', 'anger', 'fear', 'surprise', 'love'.\n", + "\n", + "Here's the [original notebook](https://colab.research.google.com/drive/1nwCE6b9PXIKhv2hvbqf1oZKIGkXMTi1X#scrollTo=pSzoz9InH0Ta) which trains ROBERTa model to classify the text" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0B4IhzEgO21B" + }, + "source": [ + "### Download and view data" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "6eQhtsD65svj", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "a46f0a9a-27bb-4d10-c7a3-b45b3c894526" + }, + "source": [ + "!wget https://www.dropbox.com/s/ikkqxfdbdec3fuj/test.txt\n", + "!wget https://www.dropbox.com/s/1pzkadrvffbqw6o/train.txt\n", + "!wget https://www.dropbox.com/s/2mzialpsgf9k5l3/val.txt" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "--2020-05-09 06:38:34-- https://www.dropbox.com/s/ikkqxfdbdec3fuj/test.txt\n", + "Resolving www.dropbox.com (www.dropbox.com)... 162.125.9.1, 2620:100:601f:1::a27d:901\n", + "Connecting to www.dropbox.com (www.dropbox.com)|162.125.9.1|:443... connected.\n", + "HTTP request sent, awaiting response... 301 Moved Permanently\n", + "Location: /s/raw/ikkqxfdbdec3fuj/test.txt [following]\n", + "--2020-05-09 06:38:34-- https://www.dropbox.com/s/raw/ikkqxfdbdec3fuj/test.txt\n", + "Reusing existing connection to www.dropbox.com:443.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://ucee0a4eb59e9a79892dc4e0e239.dl.dropboxusercontent.com/cd/0/inline/A3U44u6Qw37AC-ysKv8gHOrYJywB8BLj-SWf4UtgNCVbhch6g7hz1JW0yVUjMcYZGe1daItFDuwZIfhuumccC7WN93mOzuubDPQ-xL4xEAH-ugpp_5KfcQCc-l4yNej1pUo/file# [following]\n", + "--2020-05-09 06:38:35-- https://ucee0a4eb59e9a79892dc4e0e239.dl.dropboxusercontent.com/cd/0/inline/A3U44u6Qw37AC-ysKv8gHOrYJywB8BLj-SWf4UtgNCVbhch6g7hz1JW0yVUjMcYZGe1daItFDuwZIfhuumccC7WN93mOzuubDPQ-xL4xEAH-ugpp_5KfcQCc-l4yNej1pUo/file\n", + "Resolving ucee0a4eb59e9a79892dc4e0e239.dl.dropboxusercontent.com (ucee0a4eb59e9a79892dc4e0e239.dl.dropboxusercontent.com)... 162.125.9.6, 2620:100:601f:6::a27d:906\n", + "Connecting to ucee0a4eb59e9a79892dc4e0e239.dl.dropboxusercontent.com (ucee0a4eb59e9a79892dc4e0e239.dl.dropboxusercontent.com)|162.125.9.6|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 206760 (202K) [text/plain]\n", + "Saving to: ‘test.txt’\n", + "\n", + "\rtest.txt 0%[ ] 0 --.-KB/s \rtest.txt 100%[===================>] 201.91K --.-KB/s in 0.07s \n", + "\n", + "2020-05-09 06:38:35 (2.79 MB/s) - ‘test.txt’ saved [206760/206760]\n", + "\n", + "--2020-05-09 06:38:37-- https://www.dropbox.com/s/1pzkadrvffbqw6o/train.txt\n", + "Resolving www.dropbox.com (www.dropbox.com)... 162.125.9.1, 2620:100:601f:1::a27d:901\n", + "Connecting to www.dropbox.com (www.dropbox.com)|162.125.9.1|:443... connected.\n", + "HTTP request sent, awaiting response... 301 Moved Permanently\n", + "Location: /s/raw/1pzkadrvffbqw6o/train.txt [following]\n", + "--2020-05-09 06:38:38-- https://www.dropbox.com/s/raw/1pzkadrvffbqw6o/train.txt\n", + "Reusing existing connection to www.dropbox.com:443.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://uce8d13a7ac2bd3bc99f493e9cdc.dl.dropboxusercontent.com/cd/0/inline/A3VHSDKXCes7IMws7cQCAbiXyNW7dOk9CYhiTjzghv3EcDtNVLX37OVjW43i4mNdbrYOdNNhfqFlbysOgj9PUEvDo4b1Uq_2cChy-FGiz0-mNYIc07lv7AoBSphOulwSRY8/file# [following]\n", + "--2020-05-09 06:38:38-- https://uce8d13a7ac2bd3bc99f493e9cdc.dl.dropboxusercontent.com/cd/0/inline/A3VHSDKXCes7IMws7cQCAbiXyNW7dOk9CYhiTjzghv3EcDtNVLX37OVjW43i4mNdbrYOdNNhfqFlbysOgj9PUEvDo4b1Uq_2cChy-FGiz0-mNYIc07lv7AoBSphOulwSRY8/file\n", + "Resolving uce8d13a7ac2bd3bc99f493e9cdc.dl.dropboxusercontent.com (uce8d13a7ac2bd3bc99f493e9cdc.dl.dropboxusercontent.com)... 162.125.9.6, 2620:100:601f:6::a27d:906\n", + "Connecting to uce8d13a7ac2bd3bc99f493e9cdc.dl.dropboxusercontent.com (uce8d13a7ac2bd3bc99f493e9cdc.dl.dropboxusercontent.com)|162.125.9.6|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1658616 (1.6M) [text/plain]\n", + "Saving to: ‘train.txt’\n", + "\n", + "train.txt 100%[===================>] 1.58M --.-KB/s in 0.1s \n", + "\n", + "2020-05-09 06:38:38 (13.8 MB/s) - ‘train.txt’ saved [1658616/1658616]\n", + "\n", + "--2020-05-09 06:38:41-- https://www.dropbox.com/s/2mzialpsgf9k5l3/val.txt\n", + "Resolving www.dropbox.com (www.dropbox.com)... 162.125.9.1, 2620:100:601f:1::a27d:901\n", + "Connecting to www.dropbox.com (www.dropbox.com)|162.125.9.1|:443... connected.\n", + "HTTP request sent, awaiting response... 301 Moved Permanently\n", + "Location: /s/raw/2mzialpsgf9k5l3/val.txt [following]\n", + "--2020-05-09 06:38:41-- https://www.dropbox.com/s/raw/2mzialpsgf9k5l3/val.txt\n", + "Reusing existing connection to www.dropbox.com:443.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://uccfba89432e5bdbb64e910d3444.dl.dropboxusercontent.com/cd/0/inline/A3Uqwy1biv6Ipmcdb5FAQtm-d1nMaHHQJKrKTqu-TusvptAdDtwpPRPxMIuZovISOPSJPhwNP1imjPtokJO3KO6OlofN61eqKzGDn-P7BovjRs9wkVRJW0HjMMaz8Q5vmGU/file# [following]\n", + "--2020-05-09 06:38:42-- https://uccfba89432e5bdbb64e910d3444.dl.dropboxusercontent.com/cd/0/inline/A3Uqwy1biv6Ipmcdb5FAQtm-d1nMaHHQJKrKTqu-TusvptAdDtwpPRPxMIuZovISOPSJPhwNP1imjPtokJO3KO6OlofN61eqKzGDn-P7BovjRs9wkVRJW0HjMMaz8Q5vmGU/file\n", + "Resolving uccfba89432e5bdbb64e910d3444.dl.dropboxusercontent.com (uccfba89432e5bdbb64e910d3444.dl.dropboxusercontent.com)... 162.125.9.6, 2620:100:601f:6::a27d:906\n", + "Connecting to uccfba89432e5bdbb64e910d3444.dl.dropboxusercontent.com (uccfba89432e5bdbb64e910d3444.dl.dropboxusercontent.com)|162.125.9.6|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 204240 (199K) [text/plain]\n", + "Saving to: ‘val.txt’\n", + "\n", + "val.txt 100%[===================>] 199.45K --.-KB/s in 0.07s \n", + "\n", + "2020-05-09 06:38:42 (2.75 MB/s) - ‘val.txt’ saved [204240/204240]\n", + "\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "yVrcVbvx74G5" + }, + "source": [ + "!mkdir emotion_data\n", + "!mv *.txt emotion_data" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "jOpnh3Y06BGU" + }, + "source": [ + "train_path = \"emotion_data/train.txt\"\n", + "test_path = \"emotion_data/test.txt\"\n", + "val_path = \"emotion_data/val.txt\"\n", + "\n", + "## emotion labels\n", + "label2int = {\n", + " \"sadness\": 0,\n", + " \"joy\": 1,\n", + " \"love\": 2,\n", + " \"anger\": 3,\n", + " \"fear\": 4,\n", + " \"surprise\": 5\n", + "}" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "r4sDek6T8PXE", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 313 + }, + "outputId": "a061ba43-03d8-4fdc-b715-b6fca8d57388" + }, + "source": [ + "data = pd.read_csv(train_path, sep=\";\", header=None, names=['text', 'emotion'],\n", + " engine=\"python\")\n", + "data.emotion.value_counts().plot.bar()" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 49 + }, + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAEXCAYAAABBFpRtAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAWn0lEQVR4nO3df9AlVX3n8fdHEH/EH6BMKATiYJzoYlQ0I7Ar2SiEX6JCFA2uP0ZDMlUuRuO6u2JWl4piCmNKd01FVhQiuolINCoLKk5Q1qhRGBBBUMKIUMKiTBwEIwEFv/tHnwevOMPzDHPn9sw971fVU7f7dN/b32a4n9v39Om+qSokSX2439gFSJJmx9CXpI4Y+pLUEUNfkjpi6EtSRwx9SerIjmMXcG923XXXWr58+dhlSNJ25eKLL/7nqlq2sWXbdOgvX76ctWvXjl2GJG1Xkly3qWV270hSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6sk1fnHVfLT/h3Jlu79qTj5zp9iTpvvJIX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHlhT6Sa5NcnmSS5OsbW2PSLImydXtcZfWniTvSrIuyWVJnjrxOqva+lcnWbV1dkmStCmbc6T/zKrat6pWtvkTgPOragVwfpsHOAJY0f5WA6fA8CEBnAjsD+wHnLjwQSFJmo0t6d45CjijTZ8BHD3R/oEafBnYOcnuwGHAmqraUFU3A2uAw7dg+5KkzbTU0C/gM0kuTrK6te1WVTe26e8Cu7XpPYDvTDz3+ta2qfafk2R1krVJ1q5fv36J5UmSlmKpt1Y+sKpuSPLLwJok35xcWFWVpKZRUFWdCpwKsHLlyqm8piRpsKQj/aq6oT3eBHyMoU/+e63bhvZ4U1v9BmCviafv2do21S5JmpFFQz/JLyV56MI0cCjwdeBsYGEEzirgE236bOBlbRTPAcAtrRvoPODQJLu0E7iHtjZJ0owspXtnN+BjSRbW/5uq+nSSi4CzkhwHXAe8sK3/SeBZwDrgNuAVAFW1IclbgIvaem+uqg1T2xNJ0qIWDf2qugZ48kbavw8cvJH2Ao7fxGudDpy++WVKkqbBK3IlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SerIUu+9o23I8hPOnen2rj35yJluT9LW45G+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkeWHPpJdkjy1STntPm9k3wlybokH06yU2t/QJtf15Yvn3iNN7T2q5IcNu2dkSTdu8050n8N8I2J+bcB76yqxwI3A8e19uOAm1v7O9t6JNkHOBZ4AnA48O4kO2xZ+ZKkzbGk0E+yJ3Ak8L42H+Ag4CNtlTOAo9v0UW2etvzgtv5RwJlVdUdVfRtYB+w3jZ2QJC3NUo/0/wfwX4GftvlHAj+oqjvb/PXAHm16D+A7AG35LW39u9s38hxJ0gwsGvpJng3cVFUXz6AekqxOsjbJ2vXr189ik5LUjaUc6T8deG6Sa4EzGbp1/iewc5Id2zp7Aje06RuAvQDa8ocD359s38hz7lZVp1bVyqpauWzZss3eIUnSpi0a+lX1hqras6qWM5yI/WxVvRj4HHBMW20V8Ik2fXabpy3/bFVVaz+2je7ZG1gBXDi1PZEkLWrHxVfZpNcDZyY5CfgqcFprPw34YJJ1wAaGDwqq6ookZwFXAncCx1fVXVuwfUnSZtqs0K+qC4AL2vQ1bGT0TVXdDrxgE89/K/DWzS1SkjQdXpErSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOrJo6Cd5YJILk3wtyRVJ/qS1753kK0nWJflwkp1a+wPa/Lq2fPnEa72htV+V5LCttVOSpI1bypH+HcBBVfVkYF/g8CQHAG8D3llVjwVuBo5r6x8H3Nza39nWI8k+wLHAE4DDgXcn2WGaOyNJuneLhn4N/qXN3r/9FXAQ8JHWfgZwdJs+qs3Tlh+cJK39zKq6o6q+DawD9pvKXkiSlmRJffpJdkhyKXATsAb4FvCDqrqzrXI9sEeb3gP4DkBbfgvwyMn2jTxnclurk6xNsnb9+vWbv0eSpE1aUuhX1V1VtS+wJ8PR+eO3VkFVdWpVrayqlcuWLdtam5GkLm3W6J2q+gHwOeDfAjsn2bEt2hO4oU3fAOwF0JY/HPj+ZPtGniNJmoGljN5ZlmTnNv0g4BDgGwzhf0xbbRXwiTZ9dpunLf9sVVVrP7aN7tkbWAFcOK0dkSQtbsfFV2F34Iw20uZ+wFlVdU6SK4Ezk5wEfBU4ra1/GvDBJOuADQwjdqiqK5KcBVwJ3AkcX1V3TXd3JEn3ZtHQr6rLgKdspP0aNjL6pqpuB16widd6K/DWzS9TkjQNXpErSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHVkx7ELkO5p+QnnznR715585Ey3J43JI31J6siioZ9krySfS3JlkiuSvKa1PyLJmiRXt8ddWnuSvCvJuiSXJXnqxGutautfnWTV1tstSdLGLOVI/07gdVW1D3AAcHySfYATgPOragVwfpsHOAJY0f5WA6fA8CEBnAjsD+wHnLjwQSFJmo1FQ7+qbqyqS9r0D4FvAHsARwFntNXOAI5u00cBH6jBl4Gdk+wOHAasqaoNVXUzsAY4fKp7I0m6V5vVp59kOfAU4CvAblV1Y1v0XWC3Nr0H8J2Jp13f2jbVLkmakSWHfpKHAB8F/qiqbp1cVlUF1DQKSrI6ydoka9evXz+Nl5QkNUsK/ST3Zwj8v66qv2vN32vdNrTHm1r7DcBeE0/fs7Vtqv3nVNWpVbWyqlYuW7Zsc/ZFkrSIRcfpJwlwGvCNqnrHxKKzgVXAye3xExPtr0pyJsNJ21uq6sYk5wF/OnHy9lDgDdPZDWn74XUIGtNSLs56OvBS4PIkl7a2P2YI+7OSHAdcB7ywLfsk8CxgHXAb8AqAqtqQ5C3ARW29N1fVhqnshSRpSRYN/ar6ApBNLD54I+sXcPwmXut04PTNKVCSND1ekStJHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjqy49gFSJofy084d6bbu/bkI2e6vXngkb4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR1ZNPSTnJ7kpiRfn2h7RJI1Sa5uj7u09iR5V5J1SS5L8tSJ56xq61+dZNXW2R1J0r1ZypH++4HD79F2AnB+Va0Azm/zAEcAK9rfauAUGD4kgBOB/YH9gBMXPigkSbOzaOhX1eeBDfdoPgo4o02fARw90f6BGnwZ2DnJ7sBhwJqq2lBVNwNr+MUPEknSVnZf+/R3q6ob2/R3gd3a9B7AdybWu761bar9FyRZnWRtkrXr16+/j+VJkjZmi0/kVlUBNYVaFl7v1KpaWVUrly1bNq2XlSRx30P/e63bhvZ4U2u/AdhrYr09W9um2iVJM3RfQ/9sYGEEzirgExPtL2ujeA4AbmndQOcBhybZpZ3APbS1SZJmaNFbKyf5EPAMYNck1zOMwjkZOCvJccB1wAvb6p8EngWsA24DXgFQVRuSvAW4qK335qq658lhSdJWtmjoV9WLNrHo4I2sW8Dxm3id04HTN6s6SdJUeUWuJHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUkUXvvSNJGiw/4dyZbu/ak4+c+mt6pC9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSMzD/0khye5Ksm6JCfMevuS1LOZhn6SHYC/BI4A9gFelGSfWdYgST2b9ZH+fsC6qrqmqn4MnAkcNeMaJKlbqarZbSw5Bji8qn6/zb8U2L+qXjWxzmpgdZt9HHDVzAqEXYF/nuH2Zs39277N8/7N877B7Pfv0VW1bGMLdpxhEUtSVacCp46x7SRrq2rlGNueBfdv+zbP+zfP+wbb1v7NunvnBmCvifk9W5skaQZmHfoXASuS7J1kJ+BY4OwZ1yBJ3Zpp905V3ZnkVcB5wA7A6VV1xSxrWMQo3Uoz5P5t3+Z5/+Z532Ab2r+ZnsiVJI3LK3IlqSOGviR1pOvQT/KcJF3/N5DUl94D73eBq5P8WZLHj13M1pZklyRPGruOachgr8XXlDSp69CvqpcATwG+Bbw/yT8mWZ3koSOXNjVJLkjysCSPAC4B3pvkHWPXtaVqGIHwybHr2FqS7JDkm2PXsbUleXSS327TD5qz995uSU5L8qk2v0+S48auq+vQB6iqW4GPMNwHaHfgd4BLkvzhqIVNz8PbPj4P+EBV7Q/89sg1TcslSZ42dhFbQ1XdBVyV5FfGrmVrSfIHDO+997SmPYGPj1fR1L2fYXj6o9r8PwF/NFo1Tdehn+S5ST4GXADcH9ivqo4Angy8bszapmjHJLsDLwTOGbuYKdsf+Mck30pyWZLLk1w2dlFTtAtwRZLzk5y98Dd2UVN0PPB04FaAqroa+OVRK5quXavqLOCnMFynBNw1bknb4L13Zuz5wDur6vOTjVV127bwNWxK3sxwtPGFqrooyWOAq0euaVoOG7uArexNYxewld1RVT9OAkCSHYF5unDoR0keSdunJAcAt4xbkhdnkWQ3YKGL4MKqumnMerR5khwIrKiqv0qyDHhIVX177Lq0uCR/BvwAeBnwh8B/BK6sqv82amFTkuSpwF8Avw58HVgGHFNVo34b7Tr0k7wA+HOG7p0Avwn8l6r6yJh1TVN7Y50E/CvwaeBJwGur6n+PWtgUJDkRWAk8rqp+LcmjgL+tqqePXNpUtCPDvwD+DbATw61LflRVDxu1sClpw6WPAw5leP+dB7yv5iiU2reXxzHs31VV9ZORS+o+9L8GHLJwdN+OFP++qp48bmXTk+TSqto3ye8Azwb+E/D5edjHJJcyjL66pKqe0touq6p5GZa6luGmhH/L8OH2MuDXquoNoxY2JUmeB5xbVXeMXcvW0A4qP11VP0zyRuCpwElVdcmYdXV9Ihe43z26c77P/P03WThvcyTDUfDofYpT9ON2VLjQZ/pLI9czdVW1Dtihqu6qqr8CDh+7pil6DvBPST6Y5NntqHievKkF/oHAwcBpwCkj1zR3Abe5Pp3kvCQvT/JyhnHfnxq5pmk7p433/g3g/PZt5vaRa5qWs5K8B9i5Df/7e+C9I9c0Tbe1W5Bf2i4gfC1z9J6tqlcAj2X4JvMi4FtJ3jduVVO1MFLnSOC9VXUuQzfdqLru3oG7v2Iu9AH/Q1XN0zhhANqFWbdU1V3taPihVfXdseuahiSHMNEnXFVrRi5papI8GvgeQ1C8Fng48O529D83ktyf4RvMK4B/X1W7jlzSVCQ5h+FHog5h6Nr5V4bBIqN2rXYZ+km+UFUHJvkhQ9dAJhb/FNgAvL2q3j1KgVOU5MEM/fi/UlWrk6xgOPE5b2P251KSBzH8283yt6JnIskRDLdCeQbDYIqzgM+08ezbvfbeOxy4vKqubtfLPLGqPjNqXT2G/mLa2NovVdXjxq5lSyX5MHAx8LKq+vX2P+KXqmrfkUvbYhMf2pNuAdYCr6uqa2Zf1fQkeQ7D6LKdqmrvJPsCb66q545c2lQk+RDwYeBT83QyN8nDqurW9g37F1TVhlnXNMnQ34Qku1fVjWPXsaUWfpA5yVcnRrh8beyvmNOQ5C3A9cDfMHxbOxb4VYZ7DL2yqp4xXnVbLsnFwEHABRP/dpdX1RPHrWx65vE6mSTnVNWzk3ybX+xJqKp6zEilAXN0Umja5iHwmx+3LoKFES6/CszLUdVzq+o9VfXDqrq1qk4FDquqDzPcwmB795ONjLaam6O0NqTxQuAFDLcJ+UqSY8atasu1wA/wW1X1mKrae+Jv1MAHb8PQgxMZLsraK8lfM5y0fvmoFU3PbUleyHDTLoBj+NnIpHkIxyuS/Adgh3Yu5tXAl0auaZreCDztntfJ8LN/z+1WVVWSc4Ft7luZR/pzro1meR5D0H8IWFlVF4xZ0xS9GHgpcBPDKJeXAi9p32xeNWZhWyLJB9vkt4AnMHwz+xDDjclGv0vjFM37dTLb5F1g7dPvQJI9gEcz8c3unjeZ07YjyZUMt7/+FPDMey4f+0TgtCR5O8NtQT7Umn4XuKyqXj9eVdPTro95LHAd8COGvv0a+4pxQ3/OJXkbw5vpCtotXhn+x9vuR4C07oA/AJbz8x9ovzdWTdOQ5NXAK4HHMIzzvnsR28CJwGlK8nx+/jqZj41ZzzS16yx+QVVdN+taJhn6cy7JVcCT5mlI3IIkXwL+gWFI6t33Ka+qj45W1BQlOaWqXjl2Hbrv2p02D2Q4x/TFse+7A4b+3Gs/1faCqvqXsWuZtoWbyY1dhzbPJq6vgJ99k5mXu4j+d4aRSX/Xmo5muP/VSeNVZejPvSQfZfglsPOZGKpZVa8eragpSXISw4Vmc/tbudp+tW/ZT66q29v8g4BLx77o0yGb8+/s9jePXgP8cZI7gJ8wZ0eK2u79P+CB/GwY8QP4+XM0o/BIX9u1dqn7CoY3FwBV9X/Hq0gaJPk4w9XGaxi6sw5huBjtehjv27ahP6eSXM69XKA09rCxaUjy+wxH+3sClwIHMHT3HDxqYRKQZNW9La+qM2ZVyyS7d+bXs9vj8e1x4YKflzAfV6vCEPhPA75cVc9M8njgT0euSSLJDsChVfXisWu5J0N/Ti2MBU5yyMLNuprXJ7kEOGGcyqbq9qq6PQlJHlBV30yy3d8ZVdu/9tsVj06yU1X9eOx6Jhn68y9Jnl5VX2wz/475udT9+iQ7Ax8H1iS5meHqR2lbcA3wxSRnM1yRC0BVvWO8kuzTn3tJfgM4neFXlwLcDPzetnCRyDQl+S2Gffz0tnZkpT4lOXFj7VX1J7OuZZKh34kkDweYsx9Gl7SZDP0OJDmS4W6Nk8Ma3zxeRdL8S/I5NjJooqoOGqGcu9mnP+eS/C/gwQx3a3wfwz3nLxy1KKkP/3li+oHA84HRf//XI/05l+SyqnrSxONDGH6T9DfHrk3qTZILq2q/MWvwSH/+LVwCfluSRwEbgN1HrEfqwj1+GP1+wEqGwQajMvTn3/9pwxrfzvCD4QW8d9ySpC5czM9+GP0nwLXAcWMWBPMzXlub9k3grnaP+b8Evswwrl3S1vV6YN+q2pvhivgfAbeNW5Kh34M3VdUPkxwIHMRwMveUkWuSevDGqrp1W3vvGfrzb+EXpY4E3ltV5wI7jViP1Itt8r1n6M+/G5K8h+F3cj+Z5AH47y7Nwjb53nPI5pxL8mDgcODyqro6ye7AE6vqMyOXJs21bfW9Z+hLUkdG/6ohSZodQ1+SOmLoS1JHDH1J6oihL0kd+f/K/NV+jg5JdwAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "EaKp3E1T8kkm", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 195 + }, + "outputId": "7b0fa7d2-199e-4e6e-b895-1d216a1be7b8" + }, + "source": [ + "train.head()" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
textemotion
0i didnt feel humiliatedsadness
1i can go from feeling so hopeless to so damned...sadness
2im grabbing a minute to post i feel greedy wronganger
3i am ever feeling nostalgic about the fireplac...love
4i am feeling grouchyanger
\n", + "
" + ], + "text/plain": [ + " text emotion\n", + "0 i didnt feel humiliated sadness\n", + "1 i can go from feeling so hopeless to so damned... sadness\n", + "2 im grabbing a minute to post i feel greedy wrong anger\n", + "3 i am ever feeling nostalgic about the fireplac... love\n", + "4 i am feeling grouchy anger" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 50 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "i-Gt1WyPBL-6", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 67 + }, + "outputId": "5ca664c8-5a05-4e8c-a15b-b66891f3e164" + }, + "source": [ + "train.count()" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "text 16000\n", + "emotion 16000\n", + "dtype: int64" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 51 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "KybpXVl1Die5", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 186, + "referenced_widgets": [ + "0037bb8409bb4d65ac4ebd956fd1e631", + "db528e3117024014b4d281b650901cbd", + "350fc08aa59849fc9fd3f3e454583a6c", + "be936dd408314d0d90a22f627ca517ca", + "99f56e1a8fdb4b2282fa6e17819d044e", + "462bd815ddbc4687bcf7695f59919f0c", + "40edb7d92c1145ee9e3bb823e4688e16", + "f827cd8a6bf846c590913c5ea40e6737" + ] + }, + "outputId": "1319d2b5-c84e-4c95-bae6-3af745326439" + }, + "source": [ + "tokenizer = T5Tokenizer.from_pretrained('t5-base')" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "INFO:filelock:Lock 139955425093728 acquired on /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f.lock\n", + "INFO:transformers.file_utils:https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model not found in cache or force_download set to True, downloading to /root/.cache/torch/transformers/tmpsnkx0l2g\n" + ], + "name": "stderr" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "0037bb8409bb4d65ac4ebd956fd1e631", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=791656.0, style=ProgressStyle(descripti…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "INFO:transformers.file_utils:storing https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model in cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\n", + "INFO:transformers.file_utils:creating metadata file for /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\n", + "INFO:filelock:Lock 139955425093728 released on /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f.lock\n", + "INFO:transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model from cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\n" + ], + "name": "stderr" + }, + { + "output_type": "stream", + "text": [ + "\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cANrUEXhO8QY" + }, + "source": [ + "### Dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8GsMQdqMPCN7" + }, + "source": [ + "Here also we will process the examples in the same way we did above. If the label is 'love' we will ask the model to predict the text 'love'" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AKh6m92eKZc4" + }, + "source": [ + "Lets check how t5 encodes the following labels" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "HDnMp5-fDIAc", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 118 + }, + "outputId": "837d1d28-2d17-4ff0-f345-64eed6949dbb" + }, + "source": [ + "emotions = [ \"sadness\", \"joy\", \"love\", \"anger\", \"fear\", \"surprise\"]\n", + "for em in emotions:\n", + " print(len(tokenizer.encode(em)))" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "1\n", + "1\n", + "1\n", + "1\n", + "1\n", + "1\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "i8VIZIWFOwMj" + }, + "source": [ + "Here also all the labels are encoded as single ids" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "8i8QD-3MDrWq" + }, + "source": [ + "class EmotionDataset(Dataset):\n", + " def __init__(self, tokenizer, data_dir, type_path, max_len=512):\n", + " self.path = os.path.join(data_dir, type_path + '.txt')\n", + "\n", + " self.data_column = \"text\"\n", + " self.class_column = \"emotion\"\n", + " self.data = pd.read_csv(self.path, sep=\";\", header=None, names=[self.data_column, self.class_column],\n", + " engine=\"python\")\n", + "\n", + " self.max_len = max_len\n", + " self.tokenizer = tokenizer\n", + " self.inputs = []\n", + " self.targets = []\n", + "\n", + " self._build()\n", + "\n", + " def __len__(self):\n", + " return len(self.inputs)\n", + "\n", + " def __getitem__(self, index):\n", + " source_ids = self.inputs[index][\"input_ids\"].squeeze()\n", + " target_ids = self.targets[index][\"input_ids\"].squeeze()\n", + "\n", + " src_mask = self.inputs[index][\"attention_mask\"].squeeze() # might need to squeeze\n", + " target_mask = self.targets[index][\"attention_mask\"].squeeze() # might need to squeeze\n", + "\n", + " return {\"source_ids\": source_ids, \"source_mask\": src_mask, \"target_ids\": target_ids, \"target_mask\": target_mask}\n", + "\n", + " def _build(self):\n", + " for idx in range(len(self.data)):\n", + " input_, target = self.data.loc[idx, self.data_column], self.data.loc[idx, self.class_column]\n", + "\n", + " input_ = input_ + ' '\n", + " target = target + \" \"\n", + "\n", + " # tokenize inputs\n", + " tokenized_inputs = self.tokenizer.batch_encode_plus(\n", + " [input_], max_length=self.max_len, pad_to_max_length=True, return_tensors=\"pt\"\n", + " )\n", + " # tokenize targets\n", + " tokenized_targets = self.tokenizer.batch_encode_plus(\n", + " [target], max_length=2, pad_to_max_length=True, return_tensors=\"pt\"\n", + " )\n", + "\n", + " self.inputs.append(tokenized_inputs)\n", + " self.targets.append(tokenized_targets)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "kRz5jyl3FBkv", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "b3587087-efa7-400b-f3f4-ebc958deb33d" + }, + "source": [ + "dataset = EmotionDataset(tokenizer, 'emotion_data', 'val', 512)\n", + "len(dataset)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "2000" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 54 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "jxT6QzUAFQN0", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 50 + }, + "outputId": "68122a3a-bf3e-4125-f768-a6410abed5a9" + }, + "source": [ + "data = dataset[42]\n", + "print(tokenizer.decode(data['source_ids']))\n", + "print(tokenizer.decode(data['target_ids']))" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "i honestly don't feel discouraged today as i usually do\n", + "sadness\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "PBVHtdIuFpID" + }, + "source": [], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DEWi6c-pGZV9" + }, + "source": [ + "### Train" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wGrpDJnLPQ0Q" + }, + "source": [ + "As I said above there's no need to change the model or add task specific head or any other hyperparameters, we'll just change the dataset and that's it!" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "kDep-uIcGYX2" + }, + "source": [ + "!mkdir -p t5_emotion" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "TgNOy7a4LJ9h", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 54 + }, + "outputId": "3945df44-55d0-40d2-d98c-fa196bb9d554" + }, + "source": [ + "args_dict.update({'data_dir': 'emotion_data', 'output_dir': 't5_emotion', 'num_train_epochs':2})\n", + "args = argparse.Namespace(**args_dict)\n", + "print(args_dict)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "{'data_dir': 'emotion_data', 'output_dir': 't5_emotion', 'model_name_or_path': 't5-base', 'tokenizer_name_or_path': 't5-base', 'max_seq_length': 512, 'learning_rate': 0.0003, 'weight_decay': 0.0, 'adam_epsilon': 1e-08, 'warmup_steps': 0, 'train_batch_size': 8, 'eval_batch_size': 8, 'num_train_epochs': 2, 'gradient_accumulation_steps': 16, 'n_gpu': 1, 'early_stop_callback': False, 'fp_16': False, 'opt_level': 'O1', 'max_grad_norm': 1.0, 'seed': 42}\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "at783kr7KvS4" + }, + "source": [ + "checkpoint_callback = pl.callbacks.ModelCheckpoint(\n", + " filepath=args.output_dir, prefix=\"checkpoint\", monitor=\"val_loss\", mode=\"min\", save_top_k=5\n", + ")\n", + "\n", + "train_params = dict(\n", + " accumulate_grad_batches=args.gradient_accumulation_steps,\n", + " gpus=args.n_gpu,\n", + " max_epochs=args.num_train_epochs,\n", + " early_stop_callback=False,\n", + " precision= 16 if args.fp_16 else 32,\n", + " amp_level=args.opt_level,\n", + " gradient_clip_val=args.max_grad_norm,\n", + " checkpoint_callback=checkpoint_callback,\n", + " callbacks=[LoggingCallback()],\n", + ")" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "1LBvpP01KvTA" + }, + "source": [ + "def get_dataset(tokenizer, type_path, args):\n", + " return EmotionDataset(tokenizer=tokenizer, data_dir=args.data_dir, type_path=type_path, max_len=args.max_seq_length)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "v3Tty_OHGlvR", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 978 + }, + "outputId": "0423fedb-7a93-4990-c6ce-545b52b86e63" + }, + "source": [ + "model = T5FineTuner(args)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "INFO:transformers.configuration_utils:loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/t5-base-config.json from cache at /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b\n", + "INFO:transformers.configuration_utils:Model config T5Config {\n", + " \"architectures\": [\n", + " \"T5WithLMHeadModel\"\n", + " ],\n", + " \"d_ff\": 3072,\n", + " \"d_kv\": 64,\n", + " \"d_model\": 768,\n", + " \"decoder_start_token_id\": 0,\n", + " \"dropout_rate\": 0.1,\n", + " \"eos_token_id\": 1,\n", + " \"initializer_factor\": 1.0,\n", + " \"is_encoder_decoder\": true,\n", + " \"layer_norm_epsilon\": 1e-06,\n", + " \"model_type\": \"t5\",\n", + " \"n_positions\": 512,\n", + " \"num_heads\": 12,\n", + " \"num_layers\": 12,\n", + " \"output_past\": true,\n", + " \"pad_token_id\": 0,\n", + " \"relative_attention_num_buckets\": 32,\n", + " \"task_specific_params\": {\n", + " \"summarization\": {\n", + " \"early_stopping\": true,\n", + " \"length_penalty\": 2.0,\n", + " \"max_length\": 200,\n", + " \"min_length\": 30,\n", + " \"no_repeat_ngram_size\": 3,\n", + " \"num_beams\": 4,\n", + " \"prefix\": \"summarize: \"\n", + " },\n", + " \"translation_en_to_de\": {\n", + " \"early_stopping\": true,\n", + " \"max_length\": 300,\n", + " \"num_beams\": 4,\n", + " \"prefix\": \"translate English to German: \"\n", + " },\n", + " \"translation_en_to_fr\": {\n", + " \"early_stopping\": true,\n", + " \"max_length\": 300,\n", + " \"num_beams\": 4,\n", + " \"prefix\": \"translate English to French: \"\n", + " },\n", + " \"translation_en_to_ro\": {\n", + " \"early_stopping\": true,\n", + " \"max_length\": 300,\n", + " \"num_beams\": 4,\n", + " \"prefix\": \"translate English to Romanian: \"\n", + " }\n", + " },\n", + " \"vocab_size\": 32128\n", + "}\n", + "\n", + "INFO:transformers.modeling_utils:loading weights file https://cdn.huggingface.co/t5-base-pytorch_model.bin from cache at /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa\n", + "INFO:transformers.modeling_utils:Weights of T5ForConditionalGeneration not initialized from pretrained model: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight']\n", + "INFO:transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model from cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\n" + ], + "name": "stderr" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "mIsW9pwEG27D", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 50 + }, + "outputId": "d0469592-9403-4397-c8cf-b2b4c48ba614" + }, + "source": [ + "trainer = pl.Trainer(**train_params)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "INFO:lightning:GPU available: True, used: True\n", + "INFO:lightning:CUDA_VISIBLE_DEVICES: [0]\n" + ], + "name": "stderr" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "xmk4GsEMHTfZ", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000, + "referenced_widgets": [ + "61d58772a6a64c5c8ad30dab2563a56f", + "4000e73e6d804763986dc9a9c74456aa", + "0dd99276ab294c939d83320f4674d5c2", + "d306f7ff1ec94561aeed9ff59ba9b54b", + "0893a9730450433fa76a74b008a6f482", + "f8873c7201e1410cb0ec52cb7e34c3c9", + "234eb8b041c44358b2f993c2853162f7", + "8f73da698e85474fbecfd91bb7770c56", + "26a0cb124049417aa9dbdd010e3af03a", + "8a14bd8f2a424b15b48426fd5e320678", + "09ed6242c5ef4a4791a1074ff7e4616e", + "487a6ea92fe0463ebbcb63094fde5136", + "c050be8414044acdb1a496495d148302", + "56a67d534f284df0bc1121f1e264f5e2", + "f168c4ae2d014e89bacc58e43427302e", + "5cabe7d5ed6b46be882c558d28a29ca2", + "1681a9ce7f9340caa50c4204777a6f9e", + "a9f0c66f958e493286155c8d2631d255", + "e04d6312d5d4425ab726588c485e668c", + "fab8ee7d5d3940819eb9131efbbad791", + "6dd2781f88eb4549b4203dfec9c1a98e", + "893ba880ac6545baa6eb4a532ecc5753", + "d4fc7ae628c94a758ce694318bc620ba", + "4c33ca548b5e4738abdac09575e2a325", + "ff475d6cdc074c14aa7b2cfede771b07", + "d77faf8b9ea6480abe594114823ca52f", + "ee4f41b591fe41a5a2d915c343b16c1d", + "d8946214acc44c4cb97688538daaa33f", + "9b9306452732495cbb1acd3e2fcf3b69", + "f42e9e596ad0485b842fee92d1884750", + "1d9f8718ba4d4b60997757ea7f1db72b", + "63db466ae63b42a5a79d051ef5af653e" + ] + }, + "outputId": "ba492b59-fc67-4fd3-d42a-5965600679df" + }, + "source": [ + "trainer.fit(model)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "INFO:lightning:\n", + " | Name | Type | Params\n", + "-----------------------------------------------------------------------------------------------------------------\n", + "0 | model | T5ForConditionalGeneration | 222 M \n", + "1 | model.shared | Embedding | 24 M \n", + "2 | model.encoder | T5Stack | 109 M \n", + "3 | model.encoder.block | ModuleList | 84 M \n", + "4 | model.encoder.block.0 | T5Block | 7 M \n", + "5 | model.encoder.block.0.layer | ModuleList | 7 M \n", + "6 | model.encoder.block.0.layer.0 | T5LayerSelfAttention | 2 M \n", + "7 | model.encoder.block.0.layer.0.SelfAttention | T5Attention | 2 M \n", + "8 | model.encoder.block.0.layer.0.SelfAttention.q | Linear | 589 K \n", + "9 | model.encoder.block.0.layer.0.SelfAttention.k | Linear | 589 K \n", + "10 | model.encoder.block.0.layer.0.SelfAttention.v | Linear | 589 K \n", + "11 | model.encoder.block.0.layer.0.SelfAttention.o | Linear | 589 K \n", + "12 | model.encoder.block.0.layer.0.SelfAttention.relative_attention_bias | Embedding | 384 \n", + "13 | model.encoder.block.0.layer.0.layer_norm | T5LayerNorm | 768 \n", + "14 | model.encoder.block.0.layer.0.dropout | Dropout | 0 \n", + "15 | model.encoder.block.0.layer.1 | T5LayerFF | 4 M \n", + "16 | model.encoder.block.0.layer.1.DenseReluDense | T5DenseReluDense | 4 M \n", + "17 | model.encoder.block.0.layer.1.DenseReluDense.wi | Linear | 2 M \n", + "18 | model.encoder.block.0.layer.1.DenseReluDense.wo | Linear | 2 M \n", + "19 | model.encoder.block.0.layer.1.DenseReluDense.dropout | Dropout | 0 \n", + "20 | model.encoder.block.0.layer.1.layer_norm | T5LayerNorm | 768 \n", + "21 | model.encoder.block.0.layer.1.dropout | Dropout | 0 \n", + "22 | model.encoder.block.1 | T5Block | 7 M \n", + "23 | model.encoder.block.1.layer | ModuleList | 7 M \n", + "24 | model.encoder.block.1.layer.0 | T5LayerSelfAttention | 2 M \n", + "25 | model.encoder.block.1.layer.0.SelfAttention | T5Attention | 2 M \n", + "26 | model.encoder.block.1.layer.0.SelfAttention.q | Linear | 589 K \n", + "27 | model.encoder.block.1.layer.0.SelfAttention.k | Linear | 589 K \n", + "28 | model.encoder.block.1.layer.0.SelfAttention.v | Linear | 589 K \n", + "29 | model.encoder.block.1.layer.0.SelfAttention.o | Linear | 589 K \n", + "30 | model.encoder.block.1.layer.0.layer_norm | T5LayerNorm | 768 \n", + "31 | model.encoder.block.1.layer.0.dropout | Dropout | 0 \n", + "32 | model.encoder.block.1.layer.1 | T5LayerFF | 4 M \n", + "33 | model.encoder.block.1.layer.1.DenseReluDense | T5DenseReluDense | 4 M \n", + "34 | model.encoder.block.1.layer.1.DenseReluDense.wi | Linear | 2 M \n", + "35 | model.encoder.block.1.layer.1.DenseReluDense.wo | Linear | 2 M \n", + "36 | model.encoder.block.1.layer.1.DenseReluDense.dropout | Dropout | 0 \n", + "37 | model.encoder.block.1.layer.1.layer_norm | T5LayerNorm | 768 \n", + "38 | model.encoder.block.1.layer.1.dropout | Dropout | 0 \n", + "39 | model.encoder.block.2 | T5Block | 7 M \n", + "40 | model.encoder.block.2.layer | ModuleList | 7 M \n", + "41 | model.encoder.block.2.layer.0 | T5LayerSelfAttention | 2 M \n", + "42 | model.encoder.block.2.layer.0.SelfAttention | T5Attention | 2 M \n", + "43 | model.encoder.block.2.layer.0.SelfAttention.q | Linear | 589 K \n", + "44 | model.encoder.block.2.layer.0.SelfAttention.k | Linear | 589 K \n", + "45 | model.encoder.block.2.layer.0.SelfAttention.v | Linear | 589 K \n", + "46 | model.encoder.block.2.layer.0.SelfAttention.o | Linear | 589 K \n", + "47 | model.encoder.block.2.layer.0.layer_norm | T5LayerNorm | 768 \n", + "48 | model.encoder.block.2.layer.0.dropout | Dropout | 0 \n", + "49 | model.encoder.block.2.layer.1 | T5LayerFF | 4 M \n", + "50 | model.encoder.block.2.layer.1.DenseReluDense | T5DenseReluDense | 4 M \n", + "51 | model.encoder.block.2.layer.1.DenseReluDense.wi | Linear | 2 M \n", + "52 | model.encoder.block.2.layer.1.DenseReluDense.wo | Linear | 2 M \n", + "53 | model.encoder.block.2.layer.1.DenseReluDense.dropout | Dropout | 0 \n", + "54 | model.encoder.block.2.layer.1.layer_norm | T5LayerNorm | 768 \n", + "55 | model.encoder.block.2.layer.1.dropout | Dropout | 0 \n", + "56 | model.encoder.block.3 | T5Block | 7 M \n", + "57 | model.encoder.block.3.layer | ModuleList | 7 M \n", + "58 | model.encoder.block.3.layer.0 | T5LayerSelfAttention | 2 M \n", + "59 | model.encoder.block.3.layer.0.SelfAttention | T5Attention | 2 M \n", + "60 | model.encoder.block.3.layer.0.SelfAttention.q | Linear | 589 K \n", + "61 | model.encoder.block.3.layer.0.SelfAttention.k | Linear | 589 K \n", + "62 | model.encoder.block.3.layer.0.SelfAttention.v | Linear | 589 K \n", + "63 | model.encoder.block.3.layer.0.SelfAttention.o | Linear | 589 K \n", + "64 | model.encoder.block.3.layer.0.layer_norm | T5LayerNorm | 768 \n", + "65 | model.encoder.block.3.layer.0.dropout | Dropout | 0 \n", + "66 | model.encoder.block.3.layer.1 | T5LayerFF | 4 M \n", + "67 | model.encoder.block.3.layer.1.DenseReluDense | T5DenseReluDense | 4 M \n", + "68 | model.encoder.block.3.layer.1.DenseReluDense.wi | Linear | 2 M \n", + "69 | model.encoder.block.3.layer.1.DenseReluDense.wo | Linear | 2 M \n", + "70 | model.encoder.block.3.layer.1.DenseReluDense.dropout | Dropout | 0 \n", + "71 | model.encoder.block.3.layer.1.layer_norm | T5LayerNorm | 768 \n", + "72 | model.encoder.block.3.layer.1.dropout | Dropout | 0 \n", + "73 | model.encoder.block.4 | T5Block | 7 M \n", + "74 | model.encoder.block.4.layer | ModuleList | 7 M \n", + "75 | model.encoder.block.4.layer.0 | T5LayerSelfAttention | 2 M \n", + "76 | model.encoder.block.4.layer.0.SelfAttention | T5Attention | 2 M \n", + "77 | model.encoder.block.4.layer.0.SelfAttention.q | Linear | 589 K \n", + "78 | model.encoder.block.4.layer.0.SelfAttention.k | Linear | 589 K \n", + "79 | model.encoder.block.4.layer.0.SelfAttention.v | Linear | 589 K \n", + "80 | model.encoder.block.4.layer.0.SelfAttention.o | Linear | 589 K \n", + "81 | model.encoder.block.4.layer.0.layer_norm | T5LayerNorm | 768 \n", + "82 | model.encoder.block.4.layer.0.dropout | Dropout | 0 \n", + "83 | model.encoder.block.4.layer.1 | T5LayerFF | 4 M \n", + "84 | model.encoder.block.4.layer.1.DenseReluDense | T5DenseReluDense | 4 M \n", + "85 | model.encoder.block.4.layer.1.DenseReluDense.wi | Linear | 2 M \n", + "86 | model.encoder.block.4.layer.1.DenseReluDense.wo | Linear | 2 M \n", + "87 | model.encoder.block.4.layer.1.DenseReluDense.dropout | Dropout | 0 \n", + "88 | model.encoder.block.4.layer.1.layer_norm | T5LayerNorm | 768 \n", + "89 | model.encoder.block.4.layer.1.dropout | Dropout | 0 \n", + "90 | model.encoder.block.5 | T5Block | 7 M \n", + "91 | model.encoder.block.5.layer | ModuleList | 7 M \n", + "92 | model.encoder.block.5.layer.0 | T5LayerSelfAttention | 2 M \n", + "93 | model.encoder.block.5.layer.0.SelfAttention | T5Attention | 2 M \n", + "94 | model.encoder.block.5.layer.0.SelfAttention.q | Linear | 589 K \n", + "95 | model.encoder.block.5.layer.0.SelfAttention.k | Linear | 589 K \n", + "96 | model.encoder.block.5.layer.0.SelfAttention.v | Linear | 589 K \n", + "97 | model.encoder.block.5.layer.0.SelfAttention.o | Linear | 589 K \n", + "98 | model.encoder.block.5.layer.0.layer_norm | T5LayerNorm | 768 \n", + "99 | model.encoder.block.5.layer.0.dropout | Dropout | 0 \n", + "100 | model.encoder.block.5.layer.1 | T5LayerFF | 4 M \n", + "101 | model.encoder.block.5.layer.1.DenseReluDense | T5DenseReluDense | 4 M \n", + "102 | model.encoder.block.5.layer.1.DenseReluDense.wi | Linear | 2 M \n", + "103 | model.encoder.block.5.layer.1.DenseReluDense.wo | Linear | 2 M \n", + "104 | model.encoder.block.5.layer.1.DenseReluDense.dropout | Dropout | 0 \n", + "105 | model.encoder.block.5.layer.1.layer_norm | T5LayerNorm | 768 \n", + "106 | model.encoder.block.5.layer.1.dropout | Dropout | 0 \n", + "107 | model.encoder.block.6 | T5Block | 7 M \n", + "108 | model.encoder.block.6.layer | ModuleList | 7 M \n", + "109 | model.encoder.block.6.layer.0 | T5LayerSelfAttention | 2 M \n", + "110 | model.encoder.block.6.layer.0.SelfAttention | T5Attention | 2 M \n", + "111 | model.encoder.block.6.layer.0.SelfAttention.q | Linear | 589 K \n", + "112 | model.encoder.block.6.layer.0.SelfAttention.k | Linear | 589 K \n", + "113 | model.encoder.block.6.layer.0.SelfAttention.v | Linear | 589 K \n", + "114 | model.encoder.block.6.layer.0.SelfAttention.o | Linear | 589 K \n", + "115 | model.encoder.block.6.layer.0.layer_norm | T5LayerNorm | 768 \n", + "116 | model.encoder.block.6.layer.0.dropout | Dropout | 0 \n", + "117 | model.encoder.block.6.layer.1 | T5LayerFF | 4 M \n", + "118 | model.encoder.block.6.layer.1.DenseReluDense | T5DenseReluDense | 4 M \n", + "119 | model.encoder.block.6.layer.1.DenseReluDense.wi | Linear | 2 M \n", + "120 | model.encoder.block.6.layer.1.DenseReluDense.wo | Linear | 2 M \n", + "121 | model.encoder.block.6.layer.1.DenseReluDense.dropout | Dropout | 0 \n", + "122 | model.encoder.block.6.layer.1.layer_norm | T5LayerNorm | 768 \n", + "123 | model.encoder.block.6.layer.1.dropout | Dropout | 0 \n", + "124 | model.encoder.block.7 | T5Block | 7 M \n", + "125 | model.encoder.block.7.layer | ModuleList | 7 M \n", + "126 | model.encoder.block.7.layer.0 | T5LayerSelfAttention | 2 M \n", + "127 | model.encoder.block.7.layer.0.SelfAttention | T5Attention | 2 M \n", + "128 | model.encoder.block.7.layer.0.SelfAttention.q | Linear | 589 K \n", + "129 | model.encoder.block.7.layer.0.SelfAttention.k | Linear | 589 K \n", + "130 | model.encoder.block.7.layer.0.SelfAttention.v | Linear | 589 K \n", + "131 | model.encoder.block.7.layer.0.SelfAttention.o | Linear | 589 K \n", + "132 | model.encoder.block.7.layer.0.layer_norm | T5LayerNorm | 768 \n", + "133 | model.encoder.block.7.layer.0.dropout | Dropout | 0 \n", + "134 | model.encoder.block.7.layer.1 | T5LayerFF | 4 M \n", + "135 | model.encoder.block.7.layer.1.DenseReluDense | T5DenseReluDense | 4 M \n", + "136 | model.encoder.block.7.layer.1.DenseReluDense.wi | Linear | 2 M \n", + "137 | model.encoder.block.7.layer.1.DenseReluDense.wo | Linear | 2 M \n", + "138 | model.encoder.block.7.layer.1.DenseReluDense.dropout | Dropout | 0 \n", + "139 | model.encoder.block.7.layer.1.layer_norm | T5LayerNorm | 768 \n", + "140 | model.encoder.block.7.layer.1.dropout | Dropout | 0 \n", + "141 | model.encoder.block.8 | T5Block | 7 M \n", + "142 | model.encoder.block.8.layer | ModuleList | 7 M \n", + "143 | model.encoder.block.8.layer.0 | T5LayerSelfAttention | 2 M \n", + "144 | model.encoder.block.8.layer.0.SelfAttention | T5Attention | 2 M \n", + "145 | model.encoder.block.8.layer.0.SelfAttention.q | Linear | 589 K \n", + "146 | model.encoder.block.8.layer.0.SelfAttention.k | Linear | 589 K \n", + "147 | model.encoder.block.8.layer.0.SelfAttention.v | Linear | 589 K \n", + "148 | model.encoder.block.8.layer.0.SelfAttention.o | Linear | 589 K \n", + "149 | model.encoder.block.8.layer.0.layer_norm | T5LayerNorm | 768 \n", + "150 | model.encoder.block.8.layer.0.dropout | Dropout | 0 \n", + "151 | model.encoder.block.8.layer.1 | T5LayerFF | 4 M \n", + "152 | model.encoder.block.8.layer.1.DenseReluDense | T5DenseReluDense | 4 M \n", + "153 | model.encoder.block.8.layer.1.DenseReluDense.wi | Linear | 2 M \n", + "154 | model.encoder.block.8.layer.1.DenseReluDense.wo | Linear | 2 M \n", + "155 | model.encoder.block.8.layer.1.DenseReluDense.dropout | Dropout | 0 \n", + "156 | model.encoder.block.8.layer.1.layer_norm | T5LayerNorm | 768 \n", + "157 | model.encoder.block.8.layer.1.dropout | Dropout | 0 \n", + "158 | model.encoder.block.9 | T5Block | 7 M \n", + "159 | model.encoder.block.9.layer | ModuleList | 7 M \n", + "160 | model.encoder.block.9.layer.0 | T5LayerSelfAttention | 2 M \n", + "161 | model.encoder.block.9.layer.0.SelfAttention | T5Attention | 2 M \n", + "162 | model.encoder.block.9.layer.0.SelfAttention.q | Linear | 589 K \n", + "163 | model.encoder.block.9.layer.0.SelfAttention.k | Linear | 589 K \n", + "164 | model.encoder.block.9.layer.0.SelfAttention.v | Linear | 589 K \n", + "165 | model.encoder.block.9.layer.0.SelfAttention.o | Linear | 589 K \n", + "166 | model.encoder.block.9.layer.0.layer_norm | T5LayerNorm | 768 \n", + "167 | model.encoder.block.9.layer.0.dropout | Dropout | 0 \n", + "168 | model.encoder.block.9.layer.1 | T5LayerFF | 4 M \n", + "169 | model.encoder.block.9.layer.1.DenseReluDense | T5DenseReluDense | 4 M \n", + "170 | model.encoder.block.9.layer.1.DenseReluDense.wi | Linear | 2 M \n", + "171 | model.encoder.block.9.layer.1.DenseReluDense.wo | Linear | 2 M \n", + "172 | model.encoder.block.9.layer.1.DenseReluDense.dropout | Dropout | 0 \n", + "173 | model.encoder.block.9.layer.1.layer_norm | T5LayerNorm | 768 \n", + "174 | model.encoder.block.9.layer.1.dropout | Dropout | 0 \n", + "175 | model.encoder.block.10 | T5Block | 7 M \n", + "176 | model.encoder.block.10.layer | ModuleList | 7 M \n", + "177 | model.encoder.block.10.layer.0 | T5LayerSelfAttention | 2 M \n", + "178 | model.encoder.block.10.layer.0.SelfAttention | T5Attention | 2 M \n", + "179 | model.encoder.block.10.layer.0.SelfAttention.q | Linear | 589 K \n", + "180 | model.encoder.block.10.layer.0.SelfAttention.k | Linear | 589 K \n", + "181 | model.encoder.block.10.layer.0.SelfAttention.v | Linear | 589 K \n", + "182 | model.encoder.block.10.layer.0.SelfAttention.o | Linear | 589 K \n", + "183 | model.encoder.block.10.layer.0.layer_norm | T5LayerNorm | 768 \n", + "184 | model.encoder.block.10.layer.0.dropout | Dropout | 0 \n", + "185 | model.encoder.block.10.layer.1 | T5LayerFF | 4 M \n", + "186 | model.encoder.block.10.layer.1.DenseReluDense | T5DenseReluDense | 4 M \n", + "187 | model.encoder.block.10.layer.1.DenseReluDense.wi | Linear | 2 M \n", + "188 | model.encoder.block.10.layer.1.DenseReluDense.wo | Linear | 2 M \n", + "189 | model.encoder.block.10.layer.1.DenseReluDense.dropout | Dropout | 0 \n", + "190 | model.encoder.block.10.layer.1.layer_norm | T5LayerNorm | 768 \n", + "191 | model.encoder.block.10.layer.1.dropout | Dropout | 0 \n", + "192 | model.encoder.block.11 | T5Block | 7 M \n", + "193 | model.encoder.block.11.layer | ModuleList | 7 M \n", + "194 | model.encoder.block.11.layer.0 | T5LayerSelfAttention | 2 M \n", + "195 | model.encoder.block.11.layer.0.SelfAttention | T5Attention | 2 M \n", + "196 | model.encoder.block.11.layer.0.SelfAttention.q | Linear | 589 K \n", + "197 | model.encoder.block.11.layer.0.SelfAttention.k | Linear | 589 K \n", + "198 | model.encoder.block.11.layer.0.SelfAttention.v | Linear | 589 K \n", + "199 | model.encoder.block.11.layer.0.SelfAttention.o | Linear | 589 K \n", + "200 | model.encoder.block.11.layer.0.layer_norm | T5LayerNorm | 768 \n", + "201 | model.encoder.block.11.layer.0.dropout | Dropout | 0 \n", + "202 | model.encoder.block.11.layer.1 | T5LayerFF | 4 M \n", + "203 | model.encoder.block.11.layer.1.DenseReluDense | T5DenseReluDense | 4 M \n", + "204 | model.encoder.block.11.layer.1.DenseReluDense.wi | Linear | 2 M \n", + "205 | model.encoder.block.11.layer.1.DenseReluDense.wo | Linear | 2 M \n", + "206 | model.encoder.block.11.layer.1.DenseReluDense.dropout | Dropout | 0 \n", + "207 | model.encoder.block.11.layer.1.layer_norm | T5LayerNorm | 768 \n", + "208 | model.encoder.block.11.layer.1.dropout | Dropout | 0 \n", + "209 | model.encoder.final_layer_norm | T5LayerNorm | 768 \n", + "210 | model.encoder.dropout | Dropout | 0 \n", + "211 | model.decoder | T5Stack | 137 M \n", + "212 | model.decoder.block | ModuleList | 113 M \n", + "213 | model.decoder.block.0 | T5Block | 9 M \n", + "214 | model.decoder.block.0.layer | ModuleList | 9 M \n", + "215 | model.decoder.block.0.layer.0 | T5LayerSelfAttention | 2 M \n", + "216 | model.decoder.block.0.layer.0.SelfAttention | T5Attention | 2 M \n", + "217 | model.decoder.block.0.layer.0.SelfAttention.q | Linear | 589 K \n", + "218 | model.decoder.block.0.layer.0.SelfAttention.k | Linear | 589 K \n", + "219 | model.decoder.block.0.layer.0.SelfAttention.v | Linear | 589 K \n", + "220 | model.decoder.block.0.layer.0.SelfAttention.o | Linear | 589 K \n", + "221 | model.decoder.block.0.layer.0.SelfAttention.relative_attention_bias | Embedding | 384 \n", + "222 | model.decoder.block.0.layer.0.layer_norm | T5LayerNorm | 768 \n", + "223 | model.decoder.block.0.layer.0.dropout | Dropout | 0 \n", + "224 | model.decoder.block.0.layer.1 | T5LayerCrossAttention | 2 M \n", + "225 | model.decoder.block.0.layer.1.EncDecAttention | T5Attention | 2 M \n", + "226 | model.decoder.block.0.layer.1.EncDecAttention.q | Linear | 589 K \n", + "227 | model.decoder.block.0.layer.1.EncDecAttention.k | Linear | 589 K \n", + "228 | model.decoder.block.0.layer.1.EncDecAttention.v | Linear | 589 K \n", + "229 | model.decoder.block.0.layer.1.EncDecAttention.o | Linear | 589 K \n", + "230 | model.decoder.block.0.layer.1.EncDecAttention.relative_attention_bias | Embedding | 384 \n", + "231 | model.decoder.block.0.layer.1.layer_norm | T5LayerNorm | 768 \n", + "232 | model.decoder.block.0.layer.1.dropout | Dropout | 0 \n", + "233 | model.decoder.block.0.layer.2 | T5LayerFF | 4 M \n", + "234 | model.decoder.block.0.layer.2.DenseReluDense | T5DenseReluDense | 4 M \n", + "235 | model.decoder.block.0.layer.2.DenseReluDense.wi | Linear | 2 M \n", + "236 | model.decoder.block.0.layer.2.DenseReluDense.wo | Linear | 2 M \n", + "237 | model.decoder.block.0.layer.2.DenseReluDense.dropout | Dropout | 0 \n", + "238 | model.decoder.block.0.layer.2.layer_norm | T5LayerNorm | 768 \n", + "239 | model.decoder.block.0.layer.2.dropout | Dropout | 0 \n", + "240 | model.decoder.block.1 | T5Block | 9 M \n", + "241 | model.decoder.block.1.layer | ModuleList | 9 M \n", + "242 | model.decoder.block.1.layer.0 | T5LayerSelfAttention | 2 M \n", + "243 | model.decoder.block.1.layer.0.SelfAttention | T5Attention | 2 M \n", + "244 | model.decoder.block.1.layer.0.SelfAttention.q | Linear | 589 K \n", + "245 | model.decoder.block.1.layer.0.SelfAttention.k | Linear | 589 K \n", + "246 | model.decoder.block.1.layer.0.SelfAttention.v | Linear | 589 K \n", + "247 | model.decoder.block.1.layer.0.SelfAttention.o | Linear | 589 K \n", + "248 | model.decoder.block.1.layer.0.layer_norm | T5LayerNorm | 768 \n", + "249 | model.decoder.block.1.layer.0.dropout | Dropout | 0 \n", + "250 | model.decoder.block.1.layer.1 | T5LayerCrossAttention | 2 M \n", + "251 | model.decoder.block.1.layer.1.EncDecAttention | T5Attention | 2 M \n", + "252 | model.decoder.block.1.layer.1.EncDecAttention.q | Linear | 589 K \n", + "253 | model.decoder.block.1.layer.1.EncDecAttention.k | Linear | 589 K \n", + "254 | model.decoder.block.1.layer.1.EncDecAttention.v | Linear | 589 K \n", + "255 | model.decoder.block.1.layer.1.EncDecAttention.o | Linear | 589 K \n", + "256 | model.decoder.block.1.layer.1.layer_norm | T5LayerNorm | 768 \n", + "257 | model.decoder.block.1.layer.1.dropout | Dropout | 0 \n", + "258 | model.decoder.block.1.layer.2 | T5LayerFF | 4 M \n", + "259 | model.decoder.block.1.layer.2.DenseReluDense | T5DenseReluDense | 4 M \n", + "260 | model.decoder.block.1.layer.2.DenseReluDense.wi | Linear | 2 M \n", + "261 | model.decoder.block.1.layer.2.DenseReluDense.wo | Linear | 2 M \n", + "262 | model.decoder.block.1.layer.2.DenseReluDense.dropout | Dropout | 0 \n", + "263 | model.decoder.block.1.layer.2.layer_norm | T5LayerNorm | 768 \n", + "264 | model.decoder.block.1.layer.2.dropout | Dropout | 0 \n", + "265 | model.decoder.block.2 | T5Block | 9 M \n", + "266 | model.decoder.block.2.layer | ModuleList | 9 M \n", + "267 | model.decoder.block.2.layer.0 | T5LayerSelfAttention | 2 M \n", + "268 | model.decoder.block.2.layer.0.SelfAttention | T5Attention | 2 M \n", + "269 | model.decoder.block.2.layer.0.SelfAttention.q | Linear | 589 K \n", + "270 | model.decoder.block.2.layer.0.SelfAttention.k | Linear | 589 K \n", + "271 | model.decoder.block.2.layer.0.SelfAttention.v | Linear | 589 K \n", + "272 | model.decoder.block.2.layer.0.SelfAttention.o | Linear | 589 K \n", + "273 | model.decoder.block.2.layer.0.layer_norm | T5LayerNorm | 768 \n", + "274 | model.decoder.block.2.layer.0.dropout | Dropout | 0 \n", + "275 | model.decoder.block.2.layer.1 | T5LayerCrossAttention | 2 M \n", + "276 | model.decoder.block.2.layer.1.EncDecAttention | T5Attention | 2 M \n", + "277 | model.decoder.block.2.layer.1.EncDecAttention.q | Linear | 589 K \n", + "278 | model.decoder.block.2.layer.1.EncDecAttention.k | Linear | 589 K \n", + "279 | model.decoder.block.2.layer.1.EncDecAttention.v | Linear | 589 K \n", + "280 | model.decoder.block.2.layer.1.EncDecAttention.o | Linear | 589 K \n", + "281 | model.decoder.block.2.layer.1.layer_norm | T5LayerNorm | 768 \n", + "282 | model.decoder.block.2.layer.1.dropout | Dropout | 0 \n", + "283 | model.decoder.block.2.layer.2 | T5LayerFF | 4 M \n", + "284 | model.decoder.block.2.layer.2.DenseReluDense | T5DenseReluDense | 4 M \n", + "285 | model.decoder.block.2.layer.2.DenseReluDense.wi | Linear | 2 M \n", + "286 | model.decoder.block.2.layer.2.DenseReluDense.wo | Linear | 2 M \n", + "287 | model.decoder.block.2.layer.2.DenseReluDense.dropout | Dropout | 0 \n", + "288 | model.decoder.block.2.layer.2.layer_norm | T5LayerNorm | 768 \n", + "289 | model.decoder.block.2.layer.2.dropout | Dropout | 0 \n", + "290 | model.decoder.block.3 | T5Block | 9 M \n", + "291 | model.decoder.block.3.layer | ModuleList | 9 M \n", + "292 | model.decoder.block.3.layer.0 | T5LayerSelfAttention | 2 M \n", + "293 | model.decoder.block.3.layer.0.SelfAttention | T5Attention | 2 M \n", + "294 | model.decoder.block.3.layer.0.SelfAttention.q | Linear | 589 K \n", + "295 | model.decoder.block.3.layer.0.SelfAttention.k | Linear | 589 K \n", + "296 | model.decoder.block.3.layer.0.SelfAttention.v | Linear | 589 K \n", + "297 | model.decoder.block.3.layer.0.SelfAttention.o | Linear | 589 K \n", + "298 | model.decoder.block.3.layer.0.layer_norm | T5LayerNorm | 768 \n", + "299 | model.decoder.block.3.layer.0.dropout | Dropout | 0 \n", + "300 | model.decoder.block.3.layer.1 | T5LayerCrossAttention | 2 M \n", + "301 | model.decoder.block.3.layer.1.EncDecAttention | T5Attention | 2 M \n", + "302 | model.decoder.block.3.layer.1.EncDecAttention.q | Linear | 589 K \n", + "303 | model.decoder.block.3.layer.1.EncDecAttention.k | Linear | 589 K \n", + "304 | model.decoder.block.3.layer.1.EncDecAttention.v | Linear | 589 K \n", + "305 | model.decoder.block.3.layer.1.EncDecAttention.o | Linear | 589 K \n", + "306 | model.decoder.block.3.layer.1.layer_norm | T5LayerNorm | 768 \n", + "307 | model.decoder.block.3.layer.1.dropout | Dropout | 0 \n", + "308 | model.decoder.block.3.layer.2 | T5LayerFF | 4 M \n", + "309 | model.decoder.block.3.layer.2.DenseReluDense | T5DenseReluDense | 4 M \n", + "310 | model.decoder.block.3.layer.2.DenseReluDense.wi | Linear | 2 M \n", + "311 | model.decoder.block.3.layer.2.DenseReluDense.wo | Linear | 2 M \n", + "312 | model.decoder.block.3.layer.2.DenseReluDense.dropout | Dropout | 0 \n", + "313 | model.decoder.block.3.layer.2.layer_norm | T5LayerNorm | 768 \n", + "314 | model.decoder.block.3.layer.2.dropout | Dropout | 0 \n", + "315 | model.decoder.block.4 | T5Block | 9 M \n", + "316 | model.decoder.block.4.layer | ModuleList | 9 M \n", + "317 | model.decoder.block.4.layer.0 | T5LayerSelfAttention | 2 M \n", + "318 | model.decoder.block.4.layer.0.SelfAttention | T5Attention | 2 M \n", + "319 | model.decoder.block.4.layer.0.SelfAttention.q | Linear | 589 K \n", + "320 | model.decoder.block.4.layer.0.SelfAttention.k | Linear | 589 K \n", + "321 | model.decoder.block.4.layer.0.SelfAttention.v | Linear | 589 K \n", + "322 | model.decoder.block.4.layer.0.SelfAttention.o | Linear | 589 K \n", + "323 | model.decoder.block.4.layer.0.layer_norm | T5LayerNorm | 768 \n", + "324 | model.decoder.block.4.layer.0.dropout | Dropout | 0 \n", + "325 | model.decoder.block.4.layer.1 | T5LayerCrossAttention | 2 M \n", + "326 | model.decoder.block.4.layer.1.EncDecAttention | T5Attention | 2 M \n", + "327 | model.decoder.block.4.layer.1.EncDecAttention.q | Linear | 589 K \n", + "328 | model.decoder.block.4.layer.1.EncDecAttention.k | Linear | 589 K \n", + "329 | model.decoder.block.4.layer.1.EncDecAttention.v | Linear | 589 K \n", + "330 | model.decoder.block.4.layer.1.EncDecAttention.o | Linear | 589 K \n", + "331 | model.decoder.block.4.layer.1.layer_norm | T5LayerNorm | 768 \n", + "332 | model.decoder.block.4.layer.1.dropout | Dropout | 0 \n", + "333 | model.decoder.block.4.layer.2 | T5LayerFF | 4 M \n", + "334 | model.decoder.block.4.layer.2.DenseReluDense | T5DenseReluDense | 4 M \n", + "335 | model.decoder.block.4.layer.2.DenseReluDense.wi | Linear | 2 M \n", + "336 | model.decoder.block.4.layer.2.DenseReluDense.wo | Linear | 2 M \n", + "337 | model.decoder.block.4.layer.2.DenseReluDense.dropout | Dropout | 0 \n", + "338 | model.decoder.block.4.layer.2.layer_norm | T5LayerNorm | 768 \n", + "339 | model.decoder.block.4.layer.2.dropout | Dropout | 0 \n", + "340 | model.decoder.block.5 | T5Block | 9 M \n", + "341 | model.decoder.block.5.layer | ModuleList | 9 M \n", + "342 | model.decoder.block.5.layer.0 | T5LayerSelfAttention | 2 M \n", + "343 | model.decoder.block.5.layer.0.SelfAttention | T5Attention | 2 M \n", + "344 | model.decoder.block.5.layer.0.SelfAttention.q | Linear | 589 K \n", + "345 | model.decoder.block.5.layer.0.SelfAttention.k | Linear | 589 K \n", + "346 | model.decoder.block.5.layer.0.SelfAttention.v | Linear | 589 K \n", + "347 | model.decoder.block.5.layer.0.SelfAttention.o | Linear | 589 K \n", + "348 | model.decoder.block.5.layer.0.layer_norm | T5LayerNorm | 768 \n", + "349 | model.decoder.block.5.layer.0.dropout | Dropout | 0 \n", + "350 | model.decoder.block.5.layer.1 | T5LayerCrossAttention | 2 M \n", + "351 | model.decoder.block.5.layer.1.EncDecAttention | T5Attention | 2 M \n", + "352 | model.decoder.block.5.layer.1.EncDecAttention.q | Linear | 589 K \n", + "353 | model.decoder.block.5.layer.1.EncDecAttention.k | Linear | 589 K \n", + "354 | model.decoder.block.5.layer.1.EncDecAttention.v | Linear | 589 K \n", + "355 | model.decoder.block.5.layer.1.EncDecAttention.o | Linear | 589 K \n", + "356 | model.decoder.block.5.layer.1.layer_norm | T5LayerNorm | 768 \n", + "357 | model.decoder.block.5.layer.1.dropout | Dropout | 0 \n", + "358 | model.decoder.block.5.layer.2 | T5LayerFF | 4 M \n", + "359 | model.decoder.block.5.layer.2.DenseReluDense | T5DenseReluDense | 4 M \n", + "360 | model.decoder.block.5.layer.2.DenseReluDense.wi | Linear | 2 M \n", + "361 | model.decoder.block.5.layer.2.DenseReluDense.wo | Linear | 2 M \n", + "362 | model.decoder.block.5.layer.2.DenseReluDense.dropout | Dropout | 0 \n", + "363 | model.decoder.block.5.layer.2.layer_norm | T5LayerNorm | 768 \n", + "364 | model.decoder.block.5.layer.2.dropout | Dropout | 0 \n", + "365 | model.decoder.block.6 | T5Block | 9 M \n", + "366 | model.decoder.block.6.layer | ModuleList | 9 M \n", + "367 | model.decoder.block.6.layer.0 | T5LayerSelfAttention | 2 M \n", + "368 | model.decoder.block.6.layer.0.SelfAttention | T5Attention | 2 M \n", + "369 | model.decoder.block.6.layer.0.SelfAttention.q | Linear | 589 K \n", + "370 | model.decoder.block.6.layer.0.SelfAttention.k | Linear | 589 K \n", + "371 | model.decoder.block.6.layer.0.SelfAttention.v | Linear | 589 K \n", + "372 | model.decoder.block.6.layer.0.SelfAttention.o | Linear | 589 K \n", + "373 | model.decoder.block.6.layer.0.layer_norm | T5LayerNorm | 768 \n", + "374 | model.decoder.block.6.layer.0.dropout | Dropout | 0 \n", + "375 | model.decoder.block.6.layer.1 | T5LayerCrossAttention | 2 M \n", + "376 | model.decoder.block.6.layer.1.EncDecAttention | T5Attention | 2 M \n", + "377 | model.decoder.block.6.layer.1.EncDecAttention.q | Linear | 589 K \n", + "378 | model.decoder.block.6.layer.1.EncDecAttention.k | Linear | 589 K \n", + "379 | model.decoder.block.6.layer.1.EncDecAttention.v | Linear | 589 K \n", + "380 | model.decoder.block.6.layer.1.EncDecAttention.o | Linear | 589 K \n", + "381 | model.decoder.block.6.layer.1.layer_norm | T5LayerNorm | 768 \n", + "382 | model.decoder.block.6.layer.1.dropout | Dropout | 0 \n", + "383 | model.decoder.block.6.layer.2 | T5LayerFF | 4 M \n", + "384 | model.decoder.block.6.layer.2.DenseReluDense | T5DenseReluDense | 4 M \n", + "385 | model.decoder.block.6.layer.2.DenseReluDense.wi | Linear | 2 M \n", + "386 | model.decoder.block.6.layer.2.DenseReluDense.wo | Linear | 2 M \n", + "387 | model.decoder.block.6.layer.2.DenseReluDense.dropout | Dropout | 0 \n", + "388 | model.decoder.block.6.layer.2.layer_norm | T5LayerNorm | 768 \n", + "389 | model.decoder.block.6.layer.2.dropout | Dropout | 0 \n", + "390 | model.decoder.block.7 | T5Block | 9 M \n", + "391 | model.decoder.block.7.layer | ModuleList | 9 M \n", + "392 | model.decoder.block.7.layer.0 | T5LayerSelfAttention | 2 M \n", + "393 | model.decoder.block.7.layer.0.SelfAttention | T5Attention | 2 M \n", + "394 | model.decoder.block.7.layer.0.SelfAttention.q | Linear | 589 K \n", + "395 | model.decoder.block.7.layer.0.SelfAttention.k | Linear | 589 K \n", + "396 | model.decoder.block.7.layer.0.SelfAttention.v | Linear | 589 K \n", + "397 | model.decoder.block.7.layer.0.SelfAttention.o | Linear | 589 K \n", + "398 | model.decoder.block.7.layer.0.layer_norm | T5LayerNorm | 768 \n", + "399 | model.decoder.block.7.layer.0.dropout | Dropout | 0 \n", + "400 | model.decoder.block.7.layer.1 | T5LayerCrossAttention | 2 M \n", + "401 | model.decoder.block.7.layer.1.EncDecAttention | T5Attention | 2 M \n", + "402 | model.decoder.block.7.layer.1.EncDecAttention.q | Linear | 589 K \n", + "403 | model.decoder.block.7.layer.1.EncDecAttention.k | Linear | 589 K \n", + "404 | model.decoder.block.7.layer.1.EncDecAttention.v | Linear | 589 K \n", + "405 | model.decoder.block.7.layer.1.EncDecAttention.o | Linear | 589 K \n", + "406 | model.decoder.block.7.layer.1.layer_norm | T5LayerNorm | 768 \n", + "407 | model.decoder.block.7.layer.1.dropout | Dropout | 0 \n", + "408 | model.decoder.block.7.layer.2 | T5LayerFF | 4 M \n", + "409 | model.decoder.block.7.layer.2.DenseReluDense | T5DenseReluDense | 4 M \n", + "410 | model.decoder.block.7.layer.2.DenseReluDense.wi | Linear | 2 M \n", + "411 | model.decoder.block.7.layer.2.DenseReluDense.wo | Linear | 2 M \n", + "412 | model.decoder.block.7.layer.2.DenseReluDense.dropout | Dropout | 0 \n", + "413 | model.decoder.block.7.layer.2.layer_norm | T5LayerNorm | 768 \n", + "414 | model.decoder.block.7.layer.2.dropout | Dropout | 0 \n", + "415 | model.decoder.block.8 | T5Block | 9 M \n", + "416 | model.decoder.block.8.layer | ModuleList | 9 M \n", + "417 | model.decoder.block.8.layer.0 | T5LayerSelfAttention | 2 M \n", + "418 | model.decoder.block.8.layer.0.SelfAttention | T5Attention | 2 M \n", + "419 | model.decoder.block.8.layer.0.SelfAttention.q | Linear | 589 K \n", + "420 | model.decoder.block.8.layer.0.SelfAttention.k | Linear | 589 K \n", + "421 | model.decoder.block.8.layer.0.SelfAttention.v | Linear | 589 K \n", + "422 | model.decoder.block.8.layer.0.SelfAttention.o | Linear | 589 K \n", + "423 | model.decoder.block.8.layer.0.layer_norm | T5LayerNorm | 768 \n", + "424 | model.decoder.block.8.layer.0.dropout | Dropout | 0 \n", + "425 | model.decoder.block.8.layer.1 | T5LayerCrossAttention | 2 M \n", + "426 | model.decoder.block.8.layer.1.EncDecAttention | T5Attention | 2 M \n", + "427 | model.decoder.block.8.layer.1.EncDecAttention.q | Linear | 589 K \n", + "428 | model.decoder.block.8.layer.1.EncDecAttention.k | Linear | 589 K \n", + "429 | model.decoder.block.8.layer.1.EncDecAttention.v | Linear | 589 K \n", + "430 | model.decoder.block.8.layer.1.EncDecAttention.o | Linear | 589 K \n", + "431 | model.decoder.block.8.layer.1.layer_norm | T5LayerNorm | 768 \n", + "432 | model.decoder.block.8.layer.1.dropout | Dropout | 0 \n", + "433 | model.decoder.block.8.layer.2 | T5LayerFF | 4 M \n", + "434 | model.decoder.block.8.layer.2.DenseReluDense | T5DenseReluDense | 4 M \n", + "435 | model.decoder.block.8.layer.2.DenseReluDense.wi | Linear | 2 M \n", + "436 | model.decoder.block.8.layer.2.DenseReluDense.wo | Linear | 2 M \n", + "437 | model.decoder.block.8.layer.2.DenseReluDense.dropout | Dropout | 0 \n", + "438 | model.decoder.block.8.layer.2.layer_norm | T5LayerNorm | 768 \n", + "439 | model.decoder.block.8.layer.2.dropout | Dropout | 0 \n", + "440 | model.decoder.block.9 | T5Block | 9 M \n", + "441 | model.decoder.block.9.layer | ModuleList | 9 M \n", + "442 | model.decoder.block.9.layer.0 | T5LayerSelfAttention | 2 M \n", + "443 | model.decoder.block.9.layer.0.SelfAttention | T5Attention | 2 M \n", + "444 | model.decoder.block.9.layer.0.SelfAttention.q | Linear | 589 K \n", + "445 | model.decoder.block.9.layer.0.SelfAttention.k | Linear | 589 K \n", + "446 | model.decoder.block.9.layer.0.SelfAttention.v | Linear | 589 K \n", + "447 | model.decoder.block.9.layer.0.SelfAttention.o | Linear | 589 K \n", + "448 | model.decoder.block.9.layer.0.layer_norm | T5LayerNorm | 768 \n", + "449 | model.decoder.block.9.layer.0.dropout | Dropout | 0 \n", + "450 | model.decoder.block.9.layer.1 | T5LayerCrossAttention | 2 M \n", + "451 | model.decoder.block.9.layer.1.EncDecAttention | T5Attention | 2 M \n", + "452 | model.decoder.block.9.layer.1.EncDecAttention.q | Linear | 589 K \n", + "453 | model.decoder.block.9.layer.1.EncDecAttention.k | Linear | 589 K \n", + "454 | model.decoder.block.9.layer.1.EncDecAttention.v | Linear | 589 K \n", + "455 | model.decoder.block.9.layer.1.EncDecAttention.o | Linear | 589 K \n", + "456 | model.decoder.block.9.layer.1.layer_norm | T5LayerNorm | 768 \n", + "457 | model.decoder.block.9.layer.1.dropout | Dropout | 0 \n", + "458 | model.decoder.block.9.layer.2 | T5LayerFF | 4 M \n", + "459 | model.decoder.block.9.layer.2.DenseReluDense | T5DenseReluDense | 4 M \n", + "460 | model.decoder.block.9.layer.2.DenseReluDense.wi | Linear | 2 M \n", + "461 | model.decoder.block.9.layer.2.DenseReluDense.wo | Linear | 2 M \n", + "462 | model.decoder.block.9.layer.2.DenseReluDense.dropout | Dropout | 0 \n", + "463 | model.decoder.block.9.layer.2.layer_norm | T5LayerNorm | 768 \n", + "464 | model.decoder.block.9.layer.2.dropout | Dropout | 0 \n", + "465 | model.decoder.block.10 | T5Block | 9 M \n", + "466 | model.decoder.block.10.layer | ModuleList | 9 M \n", + "467 | model.decoder.block.10.layer.0 | T5LayerSelfAttention | 2 M \n", + "468 | model.decoder.block.10.layer.0.SelfAttention | T5Attention | 2 M \n", + "469 | model.decoder.block.10.layer.0.SelfAttention.q | Linear | 589 K \n", + "470 | model.decoder.block.10.layer.0.SelfAttention.k | Linear | 589 K \n", + "471 | model.decoder.block.10.layer.0.SelfAttention.v | Linear | 589 K \n", + "472 | model.decoder.block.10.layer.0.SelfAttention.o | Linear | 589 K \n", + "473 | model.decoder.block.10.layer.0.layer_norm | T5LayerNorm | 768 \n", + "474 | model.decoder.block.10.layer.0.dropout | Dropout | 0 \n", + "475 | model.decoder.block.10.layer.1 | T5LayerCrossAttention | 2 M \n", + "476 | model.decoder.block.10.layer.1.EncDecAttention | T5Attention | 2 M \n", + "477 | model.decoder.block.10.layer.1.EncDecAttention.q | Linear | 589 K \n", + "478 | model.decoder.block.10.layer.1.EncDecAttention.k | Linear | 589 K \n", + "479 | model.decoder.block.10.layer.1.EncDecAttention.v | Linear | 589 K \n", + "480 | model.decoder.block.10.layer.1.EncDecAttention.o | Linear | 589 K \n", + "481 | model.decoder.block.10.layer.1.layer_norm | T5LayerNorm | 768 \n", + "482 | model.decoder.block.10.layer.1.dropout | Dropout | 0 \n", + "483 | model.decoder.block.10.layer.2 | T5LayerFF | 4 M \n", + "484 | model.decoder.block.10.layer.2.DenseReluDense | T5DenseReluDense | 4 M \n", + "485 | model.decoder.block.10.layer.2.DenseReluDense.wi | Linear | 2 M \n", + "486 | model.decoder.block.10.layer.2.DenseReluDense.wo | Linear | 2 M \n", + "487 | model.decoder.block.10.layer.2.DenseReluDense.dropout | Dropout | 0 \n", + "488 | model.decoder.block.10.layer.2.layer_norm | T5LayerNorm | 768 \n", + "489 | model.decoder.block.10.layer.2.dropout | Dropout | 0 \n", + "490 | model.decoder.block.11 | T5Block | 9 M \n", + "491 | model.decoder.block.11.layer | ModuleList | 9 M \n", + "492 | model.decoder.block.11.layer.0 | T5LayerSelfAttention | 2 M \n", + "493 | model.decoder.block.11.layer.0.SelfAttention | T5Attention | 2 M \n", + "494 | model.decoder.block.11.layer.0.SelfAttention.q | Linear | 589 K \n", + "495 | model.decoder.block.11.layer.0.SelfAttention.k | Linear | 589 K \n", + "496 | model.decoder.block.11.layer.0.SelfAttention.v | Linear | 589 K \n", + "497 | model.decoder.block.11.layer.0.SelfAttention.o | Linear | 589 K \n", + "498 | model.decoder.block.11.layer.0.layer_norm | T5LayerNorm | 768 \n", + "499 | model.decoder.block.11.layer.0.dropout | Dropout | 0 \n", + "500 | model.decoder.block.11.layer.1 | T5LayerCrossAttention | 2 M \n", + "501 | model.decoder.block.11.layer.1.EncDecAttention | T5Attention | 2 M \n", + "502 | model.decoder.block.11.layer.1.EncDecAttention.q | Linear | 589 K \n", + "503 | model.decoder.block.11.layer.1.EncDecAttention.k | Linear | 589 K \n", + "504 | model.decoder.block.11.layer.1.EncDecAttention.v | Linear | 589 K \n", + "505 | model.decoder.block.11.layer.1.EncDecAttention.o | Linear | 589 K \n", + "506 | model.decoder.block.11.layer.1.layer_norm | T5LayerNorm | 768 \n", + "507 | model.decoder.block.11.layer.1.dropout | Dropout | 0 \n", + "508 | model.decoder.block.11.layer.2 | T5LayerFF | 4 M \n", + "509 | model.decoder.block.11.layer.2.DenseReluDense | T5DenseReluDense | 4 M \n", + "510 | model.decoder.block.11.layer.2.DenseReluDense.wi | Linear | 2 M \n", + "511 | model.decoder.block.11.layer.2.DenseReluDense.wo | Linear | 2 M \n", + "512 | model.decoder.block.11.layer.2.DenseReluDense.dropout | Dropout | 0 \n", + "513 | model.decoder.block.11.layer.2.layer_norm | T5LayerNorm | 768 \n", + "514 | model.decoder.block.11.layer.2.dropout | Dropout | 0 \n", + "515 | model.decoder.final_layer_norm | T5LayerNorm | 768 \n", + "516 | model.decoder.dropout | Dropout | 0 \n", + "517 | model.lm_head | Linear | 24 M \n" + ], + "name": "stderr" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "61d58772a6a64c5c8ad30dab2563a56f", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "\r" + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "26a0cb124049417aa9dbdd010e3af03a", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "1681a9ce7f9340caa50c4204777a6f9e", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "INFO:__main__:***** Validation results *****\n", + "INFO:__main__:avg_val_loss = tensor(0.0846, device='cuda:0')\n", + "\n", + "INFO:__main__:loss = tensor(0.0290, device='cuda:0')\n", + "\n", + "INFO:__main__:train_loss = tensor(0.0290, device='cuda:0')\n", + "\n", + "INFO:__main__:val_loss = tensor(0.0846, device='cuda:0')\n", + "\n" + ], + "name": "stderr" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "ff475d6cdc074c14aa7b2cfede771b07", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "INFO:__main__:***** Validation results *****\n", + "INFO:__main__:avg_train_loss = tensor(0.5601, device='cuda:0')\n", + "\n", + "INFO:__main__:avg_val_loss = tensor(0.0696, device='cuda:0')\n", + "\n", + "INFO:__main__:epoch = 0\n", + "\n", + "INFO:__main__:loss = tensor(0.0134, device='cuda:0')\n", + "\n", + "INFO:__main__:train_loss = tensor(0.0134, device='cuda:0')\n", + "\n", + "INFO:__main__:val_loss = tensor(0.0696, device='cuda:0')\n", + "\n" + ], + "name": "stderr" + }, + { + "output_type": "stream", + "text": [ + "\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "1" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 70 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GwdWdHG0RP5J" + }, + "source": [ + "### Eval" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "dq7cCiOPRQzs" + }, + "source": [ + "import textwrap\n", + "from tqdm.auto import tqdm\n", + "from sklearn import metrics" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "XKsHzqGMRQzz" + }, + "source": [ + "dataset = EmotionDataset(tokenizer, 'emotion_data', 'test', 512)\n", + "loader = DataLoader(dataset, batch_size=32, shuffle=True)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "QK7s7IpERQz5" + }, + "source": [ + "it = iter(loader)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "5_79Jk36RQz-", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "a49604ae-31da-49bc-9a90-bb5bd1366ebf" + }, + "source": [ + "batch = next(it)\n", + "batch[\"source_ids\"].shape" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "torch.Size([32, 512])" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 74 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "RQZKyEaVRQ0B" + }, + "source": [ + "outs = model.model.generate(input_ids=batch['source_ids'].cuda(),\n", + " attention_mask=batch['source_mask'].cuda(),\n", + " max_length=2)\n", + "\n", + "dec = [tokenizer.decode(ids) for ids in outs]\n", + "\n", + "texts = [tokenizer.decode(ids) for ids in batch['source_ids']]\n", + "targets = [tokenizer.decode(ids) for ids in batch['target_ids']]" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "aAjhiBcrRQ0E", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "93cdd40b-310f-458d-e5ae-21debf158a39" + }, + "source": [ + "for i in range(32):\n", + " c = texts[i]\n", + " lines = textwrap.wrap(\"text:\\n%s\\n\" % c, width=100)\n", + " print(\"\\n\".join(lines))\n", + " print(\"\\nActual sentiment: %s\" % targets[i])\n", + " print(\"predicted sentiment: %s\" % dec[i])\n", + " print(\"=====================================================================\\n\")" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "text: i feel like ive come a long way and im proud of what ive achieved not only this week but this\n", + "year as well\n", + "\n", + "Actual sentiment: joy\n", + "predicted sentiment: joy\n", + "=====================================================================\n", + "\n", + "text: i feel unfathomably rich in having had a healthy pregnancy so far\n", + "\n", + "Actual sentiment: joy\n", + "predicted sentiment: joy\n", + "=====================================================================\n", + "\n", + "text: im just feeling emo and bitchy atm\n", + "\n", + "Actual sentiment: anger\n", + "predicted sentiment: anger\n", + "=====================================================================\n", + "\n", + "text: i always feel troubled when we re on the road touring living in a van or more recently in the\n", + "circus buses no place to hang my hat as the song lyric has it\n", + "\n", + "Actual sentiment: sadness\n", + "predicted sentiment: sadness\n", + "=====================================================================\n", + "\n", + "text: i still feel confused and guilty about the whole thing\n", + "\n", + "Actual sentiment: fear\n", + "predicted sentiment: fear\n", + "=====================================================================\n", + "\n", + "text: i feel immensely distracted by the barrage of media i receive solicit\n", + "\n", + "Actual sentiment: anger\n", + "predicted sentiment: anger\n", + "=====================================================================\n", + "\n", + "text: im feeling too tortured to write today\n", + "\n", + "Actual sentiment: fear\n", + "predicted sentiment: anger\n", + "=====================================================================\n", + "\n", + "text: i have the joy of allowing kids to feel like the valued treasures that they are and to just\n", + "have a blast being a kid alongside with them but can i just say its an incredibly humbling\n", + "experience to have influence into a childs life and to know that what you do and say is being\n", + "internalized\n", + "\n", + "Actual sentiment: joy\n", + "predicted sentiment: joy\n", + "=====================================================================\n", + "\n", + "text: i dont want flowers or candy but the kind of guy that knows i like thinly sliced limes in my\n", + "mineral water because it makes me feel glamorous and is humored by how pretentious that is\n", + "\n", + "Actual sentiment: joy\n", + "predicted sentiment: joy\n", + "=====================================================================\n", + "\n", + "text: i just was expressing myself and her unexpected and kind gesture made me feel bad for a short\n", + "moment as that was not my intent but for a larger moment which remains with me it reminded me of my\n", + "blessings like having good friends that have your back\n", + "\n", + "Actual sentiment: sadness\n", + "predicted sentiment: sadness\n", + "=====================================================================\n", + "\n", + "text: im feeling brave ill snatch him to on my lap and after a few seconds of struggling he\n", + "completely relaxes and submits to mommy scratches\n", + "\n", + "Actual sentiment: joy\n", + "predicted sentiment: joy\n", + "=====================================================================\n", + "\n", + "text: im sick of feeling unimportant like nobody needs me\n", + "\n", + "Actual sentiment: sadness\n", + "predicted sentiment: sadness\n", + "=====================================================================\n", + "\n", + "text: i feel like these unfortunate events fit in with my thought quote i posted above\n", + "\n", + "Actual sentiment: sadness\n", + "predicted sentiment: sadness\n", + "=====================================================================\n", + "\n", + "text: i feel like they don t think it s sincere when it really is she told us exclusively\n", + "\n", + "Actual sentiment: joy\n", + "predicted sentiment: joy\n", + "=====================================================================\n", + "\n", + "text: i feel a little low about being in japan and i always feel pangs of guilt when i fail to\n", + "appreciate my living situation and decisions\n", + "\n", + "Actual sentiment: sadness\n", + "predicted sentiment: sadness\n", + "=====================================================================\n", + "\n", + "text: i couldn t help but feel pissed off at both sides of the debate and the unnecessary dichotomy\n", + "itself\n", + "\n", + "Actual sentiment: anger\n", + "predicted sentiment: anger\n", + "=====================================================================\n", + "\n", + "text: i felt so bad for the bad grade and feeling like having to hide it that i didnt know what to\n", + "say except to declare in all my frustration that i hated school\n", + "\n", + "Actual sentiment: sadness\n", + "predicted sentiment: sadness\n", + "=====================================================================\n", + "\n", + "text: i do feel proud and happy and also very grateful to all who read me\n", + "\n", + "Actual sentiment: joy\n", + "predicted sentiment: joy\n", + "=====================================================================\n", + "\n", + "text: i am going to have to check on in just a few minutes but there is this clock up above the\n", + "screen that keeps ticking down the minutes i have left so am feeling a bit frantic\n", + "\n", + "Actual sentiment: fear\n", + "predicted sentiment: fear\n", + "=====================================================================\n", + "\n", + "text: i am feeling bitchy this evening\n", + "\n", + "Actual sentiment: anger\n", + "predicted sentiment: anger\n", + "=====================================================================\n", + "\n", + "text: i feel like my room is messy if theyre open\n", + "\n", + "Actual sentiment: sadness\n", + "predicted sentiment: sadness\n", + "=====================================================================\n", + "\n", + "text: im starting to feel really pathetic giving the bulk of my enthusiasm these days to the\n", + "kardashians us weekly and roseanne marathons and completely ignoring this blog\n", + "\n", + "Actual sentiment: sadness\n", + "predicted sentiment: sadness\n", + "=====================================================================\n", + "\n", + "text: i am feeling content and happy with myself\n", + "\n", + "Actual sentiment: joy\n", + "predicted sentiment: joy\n", + "=====================================================================\n", + "\n", + "text: i feel slightly saddened to know that some of the kids have also resigned during my absence\n", + "\n", + "Actual sentiment: sadness\n", + "predicted sentiment: sadness\n", + "=====================================================================\n", + "\n", + "text: i feel that passionate about\n", + "\n", + "Actual sentiment: joy\n", + "predicted sentiment: joy\n", + "=====================================================================\n", + "\n", + "text: i too feel a sense of melancholy for them\n", + "\n", + "Actual sentiment: sadness\n", + "predicted sentiment: sadness\n", + "=====================================================================\n", + "\n", + "text: i can t quite figure out how i feel i m not devastated like i was with lucy and i m not sure\n", + "if that s because it s easier to do after the first time or what\n", + "\n", + "Actual sentiment: sadness\n", + "predicted sentiment: sadness\n", + "=====================================================================\n", + "\n", + "text: i feel ashamed of you\n", + "\n", + "Actual sentiment: sadness\n", + "predicted sentiment: sadness\n", + "=====================================================================\n", + "\n", + "text: i like the fresh feeling of sweet he gave me\n", + "\n", + "Actual sentiment: joy\n", + "predicted sentiment: joy\n", + "=====================================================================\n", + "\n", + "text: i feel so jaded and bored\n", + "\n", + "Actual sentiment: sadness\n", + "predicted sentiment: sadness\n", + "=====================================================================\n", + "\n", + "text: i feel fake because i think if you really want to have a good conversation and make good\n", + "contact you have to appear especially self confident and even risk talking to some people which are\n", + "no good to talk to at all until you meet one person which you have a good connection to\n", + "\n", + "Actual sentiment: sadness\n", + "predicted sentiment: sadness\n", + "=====================================================================\n", + "\n", + "text: i am feeling pretty fearless\n", + "\n", + "Actual sentiment: joy\n", + "predicted sentiment: joy\n", + "=====================================================================\n", + "\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "iq8M8nbTSJlE" + }, + "source": [ + "#### Test Metrics" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "S-oIXmoCR6kl", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 66, + "referenced_widgets": [ + "8933ab7f935e4776970ddfe35f5da135", + "84eb2bf17a9048fc94b6f47867d1b0ba", + "cdd7554792cf4c73922e2f050d1fcaaf", + "a32aa193a82f478387c14f384c2c689e", + "e4cbd76c110541cbbf1386e299c4d9d6", + "da67548f1abc4727965f72b8cb367681", + "63b11aa7ee0c4271aedb87ad3e7d23c3", + "720b90b3f86c4e5da15447777806e9a7" + ] + }, + "outputId": "98bdff55-aa82-45a3-dc13-be0e78e52ea9" + }, + "source": [ + "dataset = EmotionDataset(tokenizer, 'emotion_data', 'test', 512)\n", + "loader = DataLoader(dataset, batch_size=32, num_workers=4)\n", + "model.model.eval()\n", + "outputs = []\n", + "targets = []\n", + "for batch in tqdm(loader):\n", + " outs = model.model.generate(input_ids=batch['source_ids'].cuda(),\n", + " attention_mask=batch['source_mask'].cuda(),\n", + " max_length=2)\n", + "\n", + " dec = [tokenizer.decode(ids) for ids in outs]\n", + " target = [tokenizer.decode(ids) for ids in batch[\"target_ids\"]]\n", + "\n", + " outputs.extend(dec)\n", + " targets.extend(target)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "8933ab7f935e4776970ddfe35f5da135", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, max=63.0), HTML(value='')))" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "C9CYCGM6SRzb" + }, + "source": [ + "for i, out in enumerate(outputs):\n", + " if out not in emotions:\n", + " print(i, 'detected invalid prediction')" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "iE0WX_GbSRzq", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "24a4fe9c-3396-4364-aad3-8da50d456618" + }, + "source": [ + "metrics.accuracy_score(targets, outputs)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.929" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 82 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "mWkOZ7BASRz5", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 235 + }, + "outputId": "01a97ad3-3c70-43b6-e6a4-55ea5ccfa010" + }, + "source": [ + "print(metrics.classification_report(targets, outputs))" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " anger 0.94 0.93 0.93 275\n", + " fear 0.86 0.92 0.89 224\n", + " joy 0.97 0.93 0.95 695\n", + " love 0.79 0.89 0.84 159\n", + " sadness 0.97 0.96 0.97 581\n", + " surprise 0.75 0.74 0.75 66\n", + "\n", + " accuracy 0.93 2000\n", + " macro avg 0.88 0.90 0.89 2000\n", + "weighted avg 0.93 0.93 0.93 2000\n", + "\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "W6p9MGb6lWL5" + }, + "source": [ + "Now lets plot the confusion matrix and see for which classes our model is getting confused" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "9RtgfuzucFeN", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "outputId": "0dc41da4-f99e-4469-8d0c-f055d4a18a8d" + }, + "source": [ + "import seaborn as sn\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.6/dist-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.\n", + " import pandas.util.testing as tm\n" + ], + "name": "stderr" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "2ioVvq5rcHZE" + }, + "source": [ + "cm = metrics.confusion_matrix(targets, outputs)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "4rM5XS09SSdm", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 462 + }, + "outputId": "171788f5-4c43-485c-b84a-133ad78e2486" + }, + "source": [ + "df_cm = pd.DataFrame(cm, index = [\"anger\", \"fear\", \"joy\", \"love\", \"sadness\", \"surprise\"], columns = [\"anger\", \"fear\", \"joy\", \"love\", \"sadness\", \"surprise\"])\n", + "plt.figure(figsize = (10,7))\n", + "sn.heatmap(df_cm, annot=True, cmap='Purples', fmt='g')" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "INFO:numexpr.utils:NumExpr defaulting to 4 threads.\n" + ], + "name": "stderr" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 86 + }, + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAiYAAAGbCAYAAADwcltwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nOzdeZgU1fX/8feZGdZBhn1AHWRVf4qSKBpxRxQVUBnZXCIuCEkElCRKNOCSRIMxi4p+VcANN1xwQ0WIigqKCkgUNCSCRBSEAVkFRGDm/P7oAgdlZgrsnqru+bx46pmq21Vdp4vu6tP33rpl7o6IiIhIHGRFHYCIiIjIdkpMREREJDaUmIiIiEhsKDERERGR2FBiIiIiIrGRk+odTJ78iS77CaHzSa2jDiFtZGVZ1CGIiJQrO6dyT1Qn2nVJ+659w/8Y6UlWNSYiIiISGymvMREREZHUMsucmmTVmIiIiEhsqMZEREQk3WVOhYkSExERkXRnGXRRgJpyREREJDZUYyIiIpLmMqjvqxITERGRtJdBmYmackRERCQ2VGMiIiKS5jKowkSJiYiISLrTVTkiIiIiKaAaExERkXSXQW05SkxERETSXAblJWrKERERkfhQjYmIiEiay6S7CysxERERSXeZk5eoKUdERETiQzUmIiIiaS6TxjFRYiIiIpLmMqiLiZpyREREJD5UYyIiIpLuMqjKRImJiIhImsugvERNOSIiIhKemdUzswlm9h8zm29mHc2sgZm9YmYLgr/1g3XNzEaZ2UIzm2tmh1X0/EpMRERE0pxlWdKmEG4HJrv7gUB7YD5wNfCau7cFXguWAU4H2gbTQODuip5ciYmIiEi6M0veVO5uLA84HrgPwN23uPta4CxgXLDaOKBHMH8W8JAnvAvUM7Nm5e1DiYmIiIjsYGYDzWx2qWlgqYdbAiuBB8zsX2Z2r5nlAvnuvixYZzmQH8zvA3xRavslQVmZMrLz65o1K3nkkVv5+uu1mEHHjqdx4oln8vLLj/HOO1OoUycPgG7d+nHwwR1YtaqIkSMvo0mTxLHab78D6Nt3UJQvIRIjrh3OtGlv0qBBA557diIAd9wxiqmvTyUry2jQoCE33fhnmjRpEnGk8bFs2TKuueZqvlq1CjPo07sPF1zQL+qwYmn69OmMvPnPFBeX0KtnLwYMGBB1SLF18imdyc3NJSsrm5ycbJ56ckLUIcWS3lPfSWbnV3cfA4wp4+Ec4DBgiLu/Z2a3812zzfbt3cx8T/efkYlJVlY2PXpcQkFBGzZv3sTf/vZrDjzwJwCceOJZnHTS2T/YpmHDpgwbNqqyQ42VHmcVct655/P74d+9xy6++BKGDLkcgEcefZi777mL66+7IaII4ycnJ5thw4Zx0EEHs3HjRnr17knHjkfTpk2bqEOLleLiYm686U/cO/Y+8vPz6du3D506ddJxKseDD4yjfv36UYcRW3pP7awSb+K3BFji7u8FyxNIJCZFZtbM3ZcFTTUrgseXAgWltt83KCtThU05QY/agorWi5O8vAYUFCTenDVr1iY/v4C1a1dFHFX8dejQgby8vJ3K6tSps2P+m2++yag7WCZD48ZNOOiggwHIzc2lVavWrFhRFHFU8TNv3lyaFzSnoKCA6tWrc3rXrkx9fWrUYUka03sqGu6+HPjCzA4IijoD/wYmAhcGZRcCzwfzE4F+QS5xFLCuVJPPLlVYYxJUyUwCDtmD1xC5VauKWLLkU1q0OID//W8+06e/xMyZr9O8eRt69OhP7dqJL97Vq4u45ZYrqFmzFt26XUDr1gdHHHl83D7qNiZOnMhee9Xh/vsejDqc2Fq6dCnz58/n0EPbRx1K7BQVraBps6Y7lpvm5zN37twII4o3M+PSAf0xM/r07kufPn2iDil29J76nsr9zTgEeNTMqgOLgItJVHQ8aWb9gcXA9jftJKArsBDYFKxbrrCdX+eY2RFhIy7dcWbSpCfCbpZ03377DfffP5Kzzx5AzZq1OeaY07n22jEMG3Y7devW57nn7gMSNSw33HA/w4bdTmHhpTz00N/YvHlTZHHHzRWXD+W1V6fSrVt3Hhv/aNThxNLGjRu5YujlXHP11TvVMonsiUcefpSnJzzD6HvGMH78Y8yePSvqkCTmKvNyYXf/wN07uPuh7t7D3de4+yp37+zubd39ZHdfHazr7j7I3Vu7+yHuPrui5w+bmPwMeMfMPg0GSJlnZmWmpu4+Jgi6Q9eufUPuIrmKi7dx//0j6dDhRNq3PxqAunXrk5WVTVZWFh07nsrixZ8AkJNTjdzcugAUFLShUaOmrFhRbhNYldS9W3deffWVqMOIna1btzJ06BV073YGp5zSJepwYik/vwnLly3fsby8qIgm+fnlbFG15QfHpmHDhnQ++WTmzpsXcUTxo/fU91gSp4iFTUxOBVoDJwFnAN2Dv7Hk7owfP4r8/AI6deqxo3zdutU75ufOfYdmzfYDYMOGdZSUFAPw1VfLWbnySxo2bIrA4sWf7ZifOnUqLVu2ii6YGHJ3rr1uBK1ateKiiy6KOpzYatfuEBZ/vpglS5awZcsWXp40iU6dOkUdVixt2rSJjRs37pifMeNt2rZpG3FU8aP3VOYKdVWOuy82s2OBtu7+gJk1BmJbX71o0b+ZNet1mjVrwS23JK4o6datH3PmvMnSpf8DjIYNm9CnT+KS4IULP+Lllx8lOzsn0abbZxC5uXtF+AqicdWwK5k1ayZr166lc+dOXDZoMNOnT+Ozz/6HWRZ777031117fdRhxsqcOXOYOHEi+++/P4VnFwIwdOhQTjj+hIgji5ecnByGDx/BgIGXUlJSQmHh2fqyLcOqVau4/PIhAGwr3ka3bt057rjjIo4qfvSe2lkmXZhg7hVfamxm1wMdgAPcfX8z2xt4yt2PqWjbyZM/2eNrmauSzie1jjqEtJEVbshkEZHIZOdU7omqR/O/Je279rnPr4z0JBu2KacQOBPYCODuXwJVr0pBREREUirsAGtbSo/kFgw/KyIiInGQQTeYCZuYPGlmo0ncfGcAcAkwNnVhiYiISFiZ1MckbOfXv5nZKcB64ADgOnfXdaMiIiKSVKHvlRMkIkpGREREYiaDKkzCJSZm9jXw/R6/64DZwG/dfVGyAxMREZGQMigzCVtjchuJOwo+RmJcuHNIDLg2B7gfODEVwYmIiEjVEjYxOdPdS9+ZbIyZfeDuvzOz36ciMBEREQkngypMQicmm8ysDzAhWO4FbA7mNYCaiIhIhMLcfC9dhL3y+XzgAmAFUBTM/9zMagGDUxSbiIiIVDFhLxdeRNk37XsreeGIiIjIbsugtpywV+U0BgYALUpv4+6XpCYsERERCSuD8pLQfUyeB6YDrwLFqQtHREREqrKwiUltd/9dSiMRERGRPZJJQ9KH7fz6opl1TWkkIiIismeykjhFLGwIV5BITr4xs/Vm9rWZrU9lYCIiIlL1hL0qZy8zawC0BWqmNiQRERHZHZnUlBP2qpxLSdSa7At8ABwFzAA6py40ERERCSOTEpPdaco5Aljs7p2An5K4iZ+IiIhI0oS9Kmezu282M8yshrv/x8wOSGlkIiIiEorFoNNqsoRNTJaYWT3gOeAVM1sDLE5dWCIiIhJaBjXlhO38WhjM3mBmrwN5wOSURSUiIiJVUtgakx3c/c1UBCIiIiJ7JoMqTHY/MREREZF4sazMyUwyqLuMiIiIpDvVmIiIiKS7DGrLSXlicsrJbVK9i4zw/pylUYeQNg4/bJ+oQxCpkkpKPOoQ0kZ2Je8vg/ISNeWIiIhIfKgpR0REJM1lUudXJSYiIiLpLoPactSUIyIiIrGhGhMREZE0l0EVJkpMRERE0l0m9TFRU46IiIjEhmpMRERE0l3mVJgoMREREUl3lkGdTNSUIyIiIrGhGhMREZE0l0mdX5WYiIiIpLkMaslRU46IiIjEh2pMRERE0l0GVZkoMREREUlzmdTHRE05IiIiEhuqMREREUlzGdSSo8REREQk7WVQZqKmHBEREYkNJSYiIiJpzsySNoXY12dmNs/MPjCz2UFZAzN7xcwWBH/rB+VmZqPMbKGZzTWzwyp6fiUmIiIiac6ykjeF1Mndf+LuHYLlq4HX3L0t8FqwDHA60DaYBgJ3V/TESkxERETkxzoLGBfMjwN6lCp/yBPeBeqZWbPynkiJiYiISLozS9pkZgPNbHapaeD39ubAP83s/VKP5bv7smB+OZAfzO8DfFFq2yVBWZl0VY6IiEiaS+ZFOe4+BhhTzirHuvtSM2sCvGJm//ne9m5mvqf7V2IiIiKS5ipz5Fd3Xxr8XWFmzwJHAkVm1szdlwVNNSuC1ZcCBaU23zcoK5OackRERCQUM8s1s722zwNdgI+AicCFwWoXAs8H8xOBfsHVOUcB60o1+eySakxERETSXeUNsJYPPBtcVpwDPObuk81sFvCkmfUHFgN9gvUnAV2BhcAm4OKKdlClEpPhI4bz5ptv0KBBAyY+/0LU4URu1aoVjB79F9atW4OZ0alTN0499Ww2bFjPnXfeyFdfFdGoUT5DhlxLbu5evPTSE8yYMRWA4uJivvzyc+66awJ16tSN+JVEa/r06Yy8+c8UF5fQq2cvBgwYEHVIsaTjFN7Jp3QmNzeXrKxscnKyeerJCVGHFAsjrh3OtGlv0qBBA557diIAU6ZM5q67/49FixYxfvwTtDu4XcRRRqOy8hJ3XwS030X5KqDzLsodGLQ7+6hSiUlhjx6cf955XH3N1RWvXAVkZ2dz3nm/pEWLtnzzzSauu+5XtGt3ONOmTeHgg3/KGWecywsvjOeFFx7nnHMG0K1bX7p16wvAnDnvMHny01U+KSkuLubGm/7EvWPvIz8/n759+9CpUyfatGkTdWixouO0+x58YBz169ePOoxY6XFWIeedez6/H/7dObxN27bcduso/vDHG6ILTJKqSvUx6dDhCPLy6kUdRmzUq9eQFi3aAlCrVm323rs5q1d/xZw5MzjuuC4AHHdcF95//+0fbPvuu1Pp2LFTpcYbR/PmzaV5QXMKCgqoXr06p3ftytTXp0YdVuzoOEkydOjQgby8vJ3KWrdqTcuWLSOKKD4sy5I2Ra3cxMTMsr9/GZBkppUrl7N48ULatDmQ9evXUK9eQwDy8hqwfv2andb99tvNzJ07myOOOC6KUGOlqGgFTZs13bHcND+fFUVFEUYUTzpOu8fMuHRAf3r17smTTz4ZdTiSDpI4jknUyk1M3L0Y+K+ZNd+dJy09OMvYseVdCi1xsHnzN4wa9QfOP/8yatXK3emxRAennd+o//rXO7Rte3CVb8YRSZVHHn6Upyc8w+h7xjB+/GPMnj0r6pBEKk2YPib1gY/NbCawcXuhu59Z1galB2cp3layx4OsSOpt27aNUaNu4OijO++oAalbtz5r166iXr2GrF27irp1d27+evfdN9SME8jPb8LyZct3LC8vKqJJfn45W1RNOk67Jz84Ng0bNqTzySczd948OnQ4IuKoJM5iUNGRNGH6mFwLdAf+CPy91CRpzt25996/sffe+3H66b12lB92WEemT/8nANOn/5PDDjt6x2ObNm3gP/+Zu1NZVdau3SEs/nwxS5YsYcuWLbw8aRKdOilp+z4dp/A2bdrExo0bd8zPmPE2bdu0jTgqibtM6mNSYY2Ju79ZGYFUhiuv/C0zZ81k7dq1dDrpRAYPGkzPnr0q3C5TffLJR7z99qsUFLRk+PBfANC79yV0734Od955I2++OZlGjZowePC1O7aZPftt2rU7nJo1a0UVdqzk5OQwfPgIBgy8lJKSEgoLz9aXyC7oOIW3atUqLr98CADbirfRrVt3jjtO/bkArhp2JbOCc3jnzp24bNBg8vLyGPnnm1i9ZjWXXfYrDjzwQMaMHht1qPIjWOIS43JWSIzUdgfw/4DqQDaw0d1DdTBQU044788pd4ReKeXww8q9/5OIpEiJTuehVaueXalVD0N6P5a0/5w7njov0mqTMH1M7gTOAZ4COgD9gP1TGZSIiIjshuhbYJIm1Dgm7r4QyHb3Ynd/ADgttWGJiIhIVRSmxmSTmVUHPjCzW4BlVLGB2UREROIsDp1WkyVMgnFBsN5gEpcLFwA9UxmUiIiIhGdmSZuiFuaqnMVmVgto5u5/qISYREREpIqqsMbEzM4APgAmB8s/MbOJqQ5MREREQsqy5E1Rv5QQ69wAHAmsBXD3DwDdMUlERCQmMuhWOaESk63uvu57ZbqYXURERJIuzFU5H5vZeUC2mbUFLgdmpDYsERERCSsOnVaTpcwaEzN7OJj9FDgY+BYYD6wHhqY+NBEREQklg/qYlFdjcriZ7Q30BTqx8437agObUxmYiIiIVD3lJSb3AK8BrYDZpcqNRB+TVimMS0RERELKoJacshMTdx8FjDKzu939V5UYk4iIiOyGKjXyq5ISERERqSxhrsoRERGROMugthwlJiIiImmuSlwuLCIiIlLZVGMiIiKS5iyDqhmUmIiIiKQ5NeWIiIiIpIBqTERERNJdBtWYKDERERFJc5nUxySDXoqIiIikO9WYiIiIpLlM6vyqxERERCTdVaV75YiIiIhUFtWYiIiIpDk15UjSHX7YPlGHkDY6V7sh6hDSwssbRkQdQlqoXkOnwbCyMqi5INNkUF6iphwRERGJD/1UEBERSXcZVJulxERERCTNZVIfEzXliIiISGyoxkRERCTNZVCFiRITERGRtJdBfUzUlCMiIiKxoRoTERGRNJdJnV+VmIiIiKQ5U1OOiIiISPKpxkRERCTdZU6FiRITERGRdJdJfUzUlCMiIiKxoRoTERGRNKfOryIiIhIbZpa0KeT+ss3sX2b2YrDc0szeM7OFZvaEmVUPymsEywuDx1tU9NxKTERERNKdJXEK5wpgfqnlvwC3unsbYA3QPyjvD6wJym8N1iuXEhMREREJzcz2BboB9wbLBpwETAhWGQf0CObPCpYJHu9sFVTLKDERERFJc8lsyjGzgWY2u9Q08Hu7uw0YBpQEyw2Bte6+LVheAuwTzO8DfAEQPL4uWL9M6vwqIiKS5pJ5tbC7jwHG7Ho/1h1Y4e7vm9mJydvrd5SYiIiISFjHAGeaWVegJlAXuB2oZ2Y5Qa3IvsDSYP2lQAGwxMxygDxgVXk7UFOOiIhImjNL3lQed7/G3fd19xbAOcBUdz8feB3oFax2IfB8MD8xWCZ4fKq7e3n7UI2JiIhImovByK+/Ax43sxuBfwH3BeX3AQ+b2UJgNYlkplxKTERERGS3ufsbwBvB/CLgyF2ssxnovTvPq8REREQkzUVfYZI8SkxERETSXAyacpJGnV9FREQkNlRjIiIikuYyqMKkaiUm3377Lf36XcCWLVvYVryNLl1OZcjgIVGHFUvTp09n5M1/pri4hF49ezFgwICoQ4pUnbyaXHXvWbRs1wR3+Mslz/Hvd78AoM9vjuayv5/GWY1uZt2qTQD85IQWDL7tdLKrZbPuq00MPfH+KMOPxLfffssll17E1i1b2FZczMmdT+GyXw3ivffe5dbb/0FJSQm1a9fmjzfcSPPmzaMONzZ0ngrv5FM6k5ubS1ZWNjk52Tz15ISKN8pQmdSUU6USk+rVq3P//Q+Qm5vL1q1b+fkFP+f4446jffufRB1arBQXF3PjTX/i3rH3kZ+fT9++fejUqRNt2rSJOrTIDL79dGZOXsD1vZ8gp1o2NWtXA6DxvnXp0KUNyxev3bFunbyaDL2rO8NOe5gVX6yjXuPcqMKOVPXq1Rk7+j5q167N1q1bubj/hRx7zLHcNPJGbvvHKFq1asUTTz7O2PtG86c/3BR1uLGh89TuefCBcdSvXz/qMCSJKuxjYmZnmFlG9EUxM3JzE18S27ZtY9u2rZlV/5Uk8+bNpXlBcwoKCqhevTqnd+3K1NenRh1WZHLr1qD98S146b45AGzbWsyGdZsBGHzr6YweNgVKjRfU+bxDmP7MfFZ8sQ6AtSs3Vn7QMWBm1K5dG9j+edu2414cGzduAGDDhg00btQkyjBjR+cp2ROVNcBaZQhTY9IXuM3Mngbud/f/pDimlCouLqZX7158/vnnnHfuubQ/tH3UIcVOUdEKmjZrumO5aX4+c+fOjTCiaDVrWZ+1Kzdy9QOFtG7flE/e/5I7rpjE4Se3ZuXS9Xw6t2in9Qv2b0R2tSxue/1iau1Vg6dvf4d/PvxhRNFHq7i4mHPP78sXX3xO3z7ncMghh3L9tTcw+PLLqFGjBnVy6/DQuEejDjN2dJ4Kx8y4dEB/zIw+vfvSp0+fqEOKjBGDjCJJKqwJcfefAz8FPgUeNLN3gjsP7lXWNqXvTDh27C7vAxSZ7Oxsnn3mWV6f+jrz5s1jwYJPog5JYi47J4v9D2vG83fPYsBhd/PNxi1cdEMnzv/98Txw3Q9rkrJzsjjg8L25utsjDDv1IfpdeyL7ti33ZpoZKzs7mycfn8CUya/y0ccfsXDhAh559GHuHHUX/5z8Gmee2YO//+OvUYcZOzpPhfPIw4/y9IRnGH3PGMaPf4zZs2dFHZIkQagmGndfD0wAHgeaAYXAHDPbZY8sdx/j7h3cvcOAAd+/W3I81K1blyOPPJLpb70VdSixk5/fhOXLlu9YXl5URJP8/AgjitbKJetZuWQ982cuAeDNCf+m7WF706xlPe778DIe/9+vabxvXcbM+SUN8uuwcsl6Zk5ZyOZNW1m3ahMfTvuM1u2bVrCXzFZ3r7oc0eEI3nr7LT5Z8F8OOeRQAE7tchoffvhBxNHFl85T5csPzksNGzak88knM3fevIgjik4mNeWE6WNyppk9S2LY2WrAke5+OtAe+G1qw0uu1atXs379egA2b97MjHfeoVXLlhFHFT/t2h3C4s8Xs2TJErZs2cLLkybRqVOnqMOKzOqiDaz4Yj0F+ydqPQ7v3IoFc76kMP8Wzml5K+e0vJWVS9Yz8LB7WF20gbeen88hx+5HdnYWNWpV46Cf7cvn81dG/Coq3+o1q1n/9Xeft3fffZdWLVuxYcMGFi/+DIB333uHli1bRRhl/Og8Fc6mTZvYuHHjjvkZM96mbZu2EUcVnUxKTML0MekJ3Oru00oXuvsmM+ufmrBSY+XKlVzz+2soKSmmpKSE0049jRNPrLpfuGXJyclh+PARDBh4KSUlJRQWnl2lP/AAo4a8xIhHe5FTPZtli9Zw88XPlrnu5//5ipmTF3Df3MvwEuele+fwv49XVGK08fDVypVce/0ISoqLKXGnyyldOP74E7huxA389qpfk2VZ7FW3Ln+4/o9RhxorOk+Fs2rVKi6/PFFpv614G926dee4446LOCpJBqvg7sOJlczygSOCxZnuHvosW7ytpOIdiOyGztVuiDqEtPDyhhFRh5AWqteoUqMmSCXJzsmq1LqHf9zyZtK+a38z7IRI603CNOX0BmaSuDtgH+A9M+uV6sBEREQknKrWlDMCOGJ7LYmZNQZeJdEZVkRERCRpwiQmWd9rulmFbv4nIiISH3Go6kiSMInJZDObAowPls8BXk5dSCIiIrI7MigvqTgxcferzOxs4Jig6B53fy61YYmIiEhVVGZiYmZvufuxZvY14LBjvNuBZlYCrAb+6u53VUKcIiIiUoYqcXdhdz82+LvLoefNrCEwA1BiIiIiEqEMykv2vBOru68CTkxeKCIiIlLV/aiRhdx9WbICERERkT1TJZpyREREJD1kUF6i8UhEREQkPlRjIiIikuYyqMJEiYmIiEi6y6Q+JmrKERERkdhQjYmIiEiay6AKEyUmIiIi6U5NOSIiIiIpoBoTERGRNJdBFSZKTERERNKdmnJEREREUkA1JiIiImkugypMlJiIiIiku0xKTNSUIyIiIrGhGhMREZE0l0mdX5WYiIiIpLkMykvUlCMiIiLxoRoTSTuvfHt91CGkhaVfros6hLRQsG+9qEMQ+dHUlCMiIiLxkTl5iZpyREREJD5UYyIiIpLm1JQjIiIisaHERERERGIjg/IS9TERERGR+FCNiYiISJpTU46IiIjERgblJWrKERERkfhQjYmIiEiay6SmHNWYiIiIpDkzS9pUwX5qmtlMM/vQzD42sz8E5S3N7D0zW2hmT5hZ9aC8RrC8MHi8RUWvRYmJiIiIhPUtcJK7twd+ApxmZkcBfwFudfc2wBqgf7B+f2BNUH5rsF65lJiIiIikObPkTeXxhA3BYrVgcuAkYEJQPg7oEcyfFSwTPN7ZKqiWUWIiIiKS5pLZlGNmA81sdqlp4Pf2lW1mHwArgFeAT4G17r4tWGUJsE8wvw/wBUDw+DqgYXmvRZ1fRUREZAd3HwOMKefxYuAnZlYPeBY4MJn7V2IiIiKS5iyr8q/Kcfe1ZvY60BGoZ2Y5Qa3IvsDSYLWlQAGwxMxygDxgVXnPq6YcERGRNFdZfUzMrHFQU4KZ1QJOAeYDrwO9gtUuBJ4P5icGywSPT3V3L28fqjERERGRsJoB48wsm0TlxpPu/qKZ/Rt43MxuBP4F3Besfx/wsJktBFYD51S0AyUmIiIiaa6yBlhz97nAT3dRvgg4chflm4Heu7MPJSYiIiJpLoMGflUfExEREYkP1ZiIiIikuUy6V44SExERkTSXSYmJmnJEREQkNlRjIiIikuYyqMJEiYmIiEjay6DMRE05IiIiEhtVKjFZtmwZF110Id3P6M4ZZ3bn4Ycfijqk2Jo+fTpdu53OqaedytixY6MOJ1ZGXDuc4084lh6FZ/7gsQfHPUC7Qw5izZo1EUQWvb///Ub69OnKwIHn/+CxCRMe49RTO7Ju3VoApk6dwi9/+XN+8YvzGTp0AJ9+uqCyw42l4SOGc+xxx3DmWWdEHUrs6Tz1nWTeXThqVSoxycnJZtiwYbz4wos8Pv4JHhv/GAsXLow6rNgpLi7mxpv+xOh7xvDCxBeYNOklHadSepxVyD13//DGm8uWL2PGjBk0a9YsgqjioUuXbtx0060/KF+xoog5c2bSpEnTHWX5+c3461/vYvToRzn//Eu4/fabKzPU2Crs0YMxo8u8sasEdJ7aWWXdK6cyVKnEpHHjJhx00MEA5Obm0qpVa1asKIo4qviZN28uzQuaU1BQQPXq1StI0YEAACAASURBVDm9a1emvj416rBio0OHDuTl5f2g/JZb/sJvfvPbWPziiMohh/yUvfaq+4Py0aNvp3//QTud9A4++NAd6x544MF89dWKygoz1jp0OIK8vHpRhxF7Ok9lrtCJiZntZ2YnB/O1zGyv1IWVekuXLmX+/Pkcemj7qEOJnaKiFTRt9t0v26b5+awoUgJXnqlTX6NJkyYceMCBUYcSOzNmTKNRo8a0bt22zHUmT36BI47oWIlRSbrTeWpnlmVJm6IWKjExswHABGB0ULQv8Fw56w80s9lmNnvs2PhVSW7cuJErhl7ONVdfTZ06daIOR9LcN998w9h7xzB40JCoQ4mdzZs38/jj4+jXb0CZ63zwwftMmfIC/fsPqsTIRDJLJjXlhL1ceBCJuwa+B+DuC8ysSVkru/sYYAxA8bYS/7FBJtPWrVsZOvQKunc7g1NO6RJ1OLGUn9+E5cuW71heXlREk/z8CCOKty+++IKlS5fSs1chAEVFRfTu05PHxz9Bo0aNI44uWsuWLWH58mX86lcXALBy5UoGDbqIUaPuo0GDhixatJDbbhvJjTf+g7p1f9g8JlIWnacyV9jE5Ft337K97dzMcoBYJRxhuDvXXjeCVq1acdFFF0UdTmy1a3cIiz9fzJIlS2jSpAkvT5rELX/9a9Rhxdb+++/PtDff2rHc5dSTeeLxp6hfv36EUcVDy5ZtePLJSTuW+/Ur5I47HiAvrx4rViznj3+8mquuuo59920eYZSSjnSe2lkm9W0Lm5i8aWa/B2qZ2SnAZcALqQsrNebMmcPEiRPZf//9KTw78et26NChnHD8CRFHFi85OTkMHz6CAQMvpaSkhMLCs2nbpuz+AVXNVcOuZNasmaxdu5bOnTtx2aDB9Dy7Z9RhxcLIkdcxd+4c1q1by/nnn8kFF1zKaaf98LJqgEcfvZ+vv17PnXf+DYDs7GzuvPOBygw3lq688rfMDN5fnU46kcGDBtOzZ6+ow4odnad2lkmJiblXXPFhZllAf6ALYMAU4F4PsXHcmnIk/ZXoLRXK0i/XRR1CWijYV1fASPJl51RuL9KXXvpP0k6M3bodGGmWE7bGpAfwkLtX7RFsREREYiiDKkxCXy58BvCJmT1sZt2DPiYiIiISA1Vu5Fd3vxhoAzwFnAt8amb3pjIwERERqXpC13y4+1Yze5nE1Ti1SDTvXJqqwERERCScONR0JEvYAdZON7MHgQVAT+BeoGm5G4mIiEilqIoDrPUDngB+4e7fpjAeERERqcJCJSbufq6Z5QOnBNVFM91dd9wSERGJgarYlNMbmAn0BvoA75mZRvwRERGJgUy6KidsU84I4IjttSRm1hh4lcSN/URERESSImxikvW9pptVhB8DRURERFIoBhUdSRM2MZlsZlOA8cFyX2BSOeuLiIhIJbHKHQE/pcJ2fr3KzHoCxwRFY9z92dSFJSIiIlXR7gyw9jTwdApjERERkT1QZZpyzOxrEiO9/uAhwN29bkqiEhERkdCMzMlMyk1M3H2vygpERERERHcJFhERSXeZU2GixERERCTdxWFgtGTRWCQiIiISG6oxERERSXMZVGGixERERCTdqSlHREREJAVUYyIiIpLmMqjCRImJiIhIusukphwlJiIiImkug/IS9TERERGR+FCNiYiISJpTU85uKCnZ1T0A5fsy6D2VcllZOlhhFOxbL+oQ0sK57e+IOoS0Mf7DIVGHIGXIpO8QNeWIiIhIbKgpR0REJM1lUo2JEhMREZE0Zxl0e2E15YiIiEhsqMZEREQkzakpR0RERGIjky4XVlOOiIiIhGJmBWb2upn928w+NrMrgvIGZvaKmS0I/tYPys3MRpnZQjOba2aHVbQPJSYiIiJpzix5UwW2Ab9194OAo4BBZnYQcDXwmru3BV4LlgFOB9oG00Dg7op2oMREREQkzZlZ0qbyuPsyd58TzH8NzAf2Ac4CxgWrjQN6BPNnAQ95wrtAPTNrVt4+lJiIiIjIDmY20Mxml5oGlrFeC+CnwHtAvrsvCx5aDuQH8/sAX5TabElQViZ1fhUREUlzyez76u5jgDHl78/qAE8DQ919femaFnd3M9vj+9EoMREREUlzlXlVjplVI5GUPOruzwTFRWbWzN2XBU01K4LypUBBqc33DcrKpKYcERERCcUSGdB9wHx3/0ephyYCFwbzFwLPlyrvF1ydcxSwrlSTzy6pxkRERCTdVV6FyTHABcA8M/sgKPs9cDPwpJn1BxYDfYLHJgFdgYXAJuDiinagxERERCTNVVZTjru/RdlpUOddrO/AoN3Zh5pyREREJDZUYyIiIpLmMmhEeiUmIiIi6U73yhERERFJAdWYiIiIpLnMqS9RYiIiIpL21JQjIiIikgKqMREREUlzGVRhosREREQk3akpR0RERCQFVGMiIiKS5jKowkSJiYiISLrLpMRETTkiIiISGxlfYzLi2uFMm/YmDRo04LlnJwIwZcpk7rr7/1i0aBHjxz9Bu4PbRRxl/Dz88MNMePop3J1evXrT74J+UYcUS8uWLeOaa67mq1WrMIM+vftwgY7VLk2fPp2RN/+Z4uISevXsxYABA6IOKVJ3/vNiNm/cQkmJU7ythGv6Pk7vy35G517tWL/mGwDG3zaDf03/jOxqWQy8vjOtD25CiTsPjnyTf89aGvEriJ7eU9/JpM6vGZ+Y9DirkPPOPZ/fD796R1mbtm257dZR/OGPN0QXWIwtWLCACU8/xePjn6BatWr84pcDOeGEE9iv+X5RhxY7OTnZDBs2jIMOOpiNGzfSq3dPOnY8mjZt2kQdWqwUFxdz401/4t6x95Gfn0/fvn3o1KlTlT9Of7j4ab5eu3mnspce+hcvPDhnp7KTeyV+PF1Z+Ch1G9Ti9/ecxTV9H8e90kKNHb2ndpZBecnuN+WYWX0zOzQVwaRChw4dyMvL26msdavWtGzZMqKI4m/Rok859JBDqVWrFjk5OXTocASvvvpq1GHFUuPGTTjooIMByM3NpVWr1qxYURRxVPEzb95cmhc0p6CggOrVq3N6165MfX1q1GGljX1bN+Cj974AYP3qb9j49RZatcuPOKpo6T2VuUIlJmb2hpnVNbMGwBxgrJn9I7WhSVTatGnL+3PeZ+3atXzzzTdMnz6N5cuXRR1W7C1dupT58+dz6KHtow4ldoqKVtC0WdMdy03z81lRVMUTOHeGjy3k5ifPoXPv75qTTz2vPX995nx+9aeTya1bA4DP/vsVHTq1IivbaLxPXVod1IRGTfeKKvJY0HtqZ2aWtClqYZty8tx9vZldCjzk7teb2dyyVjazgcBAgLv+724uvbTqtvulo9atW9P/kksZMPBSatWqxYEHHEhWVnbUYcXaxo0buWLo5Vxz9dXUqVMn6nAkDVx7wVOsWbGRug1qMeLeQr5ctJp/PjGPCffMBHf6DulIv6uO4+5rX+X1Zz5m31YNuPnJc1n55Xr++8EySopLon4JIikRNjHJMbNmQB9geEUru/sYYAzA1i3FVbgVNH317NmTnj17AnDbbbeS37RpBVtUXVu3bmXo0Cvo3u0MTjmlS9ThxFJ+fhOWL1u+Y3l5URFN8qt2U8SaFRuBRNPMrFc/pc0hTZn//pc7Hn9twkf87q4zASgpdsb9ZdqOx/70SG++XLy2cgOOGb2nMlfYPiZ/BKYAC919lpm1AhakLiyJ2qpVqwD4ctmXvPraq3Tr2i3iiOLJ3bn2uhG0atWKiy66KOpwYqtdu0NY/PlilixZwpYtW3h50iQ6deoUdViRqVErh5q1q+2YP/To5ny+cBX1GtXesc6RJ7fhiwWJz2H1mjnUqJX4HXlIx+YUFztLP11d+YHHiN5TO6tyTTnu/hTwVKnlRUDPVAWVTFcNu5JZs2aydu1aOnfuxGWDBpOXl8fIP9/E6jWrueyyX3HggQcyZvTYqEONlaG/voK1a9eSk1ONEcNHULdu3ahDiqU5c+YwceJE9t9/fwrPLgRg6NChnHD8CRFHFi85OTkMHz6CAQMvpaSkhMLCs2nbpm3UYUUmr2FtrhzVHYDs7Czeeum/fPjWYgaP7EKLAxvjDiu/XM+YG15LrN+gFsPHFFJS4qxesYE7r54SZfixoPfUzmKQTySNeYjrzczsFuBG4BtgMnAo8Gt3f6SibdWUE04mvalSLQ4ZvWSOc9vfEXUIaWP8h0OiDiFtZOdkVeqJ6tNPVyXtu7Z164aRnmTDNuV0cff1QHfgM6ANcFWqghIREZGqKXTn1+BvN+Apd1+nX60iIiLxkElfyWETkxfN7D8kmnJ+ZWaNgc0VbCMiIiKyW8J2fr066Geyzt2LzWwTcFZqQxMREZEwjMypMgk78mtt4DLg7qBob6BDqoISERGR3WBJnCIWtvPrA8AW4OhgeSmJq3REREREkiZsYtLa3W8BtgK4+yZikVeJiIiIWfKmqIXt/LrFzGoBDmBmrYFvUxaViIiIhJZJfUzCJibXkxhYrcDMHgWOAS5KVVAiIiJSNYW9KucVM5sDHEWiCecKd/8qpZGJiIhIOJlTYRK6xgSgJrAm2OYgM8Pdp1WwjYiIiKRYBuUl4RITM/sL0Bf4GCgJih1QYiIiIiJJE7bGpAdwgLurw6uIiEjMZNJtYsImJouAauhKHBERkfjJnLwkdGKyCfjAzF6jVHLi7penJCoREREJLYPyktCJycRgEhEREUmZsJcLj0t1ICIiIrJnqkwfEzObRzDa6664+6FJj0hERESqrIpqTLoHfwcFfx8O/v6cchIWERERkT1RbmLi7osBzOwUd/9pqYd+F4wEe3UqgxMREZGKZVBLTui7C5uZHVNq4ejd2FZERERSyMySNkUt7FU5/YH7zSyPxFVJa4BLUhaViIiIVElhr8p5H2gfJCa4+7qURiUiIiJVUuib+JlZN+BgoOb2qh53/2OK4hIREZGQYtACkzSh+omY2T0kbuI3hERTTm9gvxTGJSIiIlVQ2A6sR7t7P2CNu/8B6Ajsn7qwREREJCxL4r+ohW3K2Rz83WRmewOrgWZhNszKiv5FpoOSEg0LE1YmVVlK9MZ/OCTqENLG5s1bow4hbeTWqVG5O8yg82LYxOQFM6sH/BWYQ2JwtbEpi0pERESqpLBNOf8Bit39aeD/gHeB51IWlYiIiIRmlryp4n3Z/Wa2wsw+KlXWwMxeMbMFwd/6QbmZ2SgzW2hmc83ssIqeP2xicq27f21mxwInAfcCd4fcVkRERFLIkjiF8CBw2vfKrgZec/e2wGt8NzL86UDbYBpIiNwhbGJSHPztBox195eA6iG3FRERkQzh7tNI9DUt7SxgXDA/DuhRqvwhT3gXqGdm5fZRDZuYLDWz0SQuGZ5kZjV2Y1sRERFJpSS25ZjZQDObXWoaGCKCfHdfFswvB/KD+X2AL0qttyQoK1PYzq99SFTb/M3d1wbZzlUhtxUREZEUSuZFOe4+BhjzI7Z3M9vjS03DDkm/CXim1PIyYFnZW4iIiEgVUmRmzdx9WVB5sSIoXwoUlFpv36CsTGqOERERSXOVeVVOGSYCFwbzFwLPlyrvF1ydcxSwrlSTzy6FvleOiIiIxFQljjxpZuOBE4FGZrYEuB64GXjSzPoDi0l0AQGYBHQFFgKbgIsrfH731I44WrxNQ5qGoZFfw9NowiLR0Miv4eXWqVGpJ6pVX21M2pdIw0a5kZ5kVWMiIiKS5jLp55oSExERkTSXSfcQU+dXERERiQ3VmIiIiKS9zKkyUWIiIiKS5tSUIyIiIpICSkxEREQkNtSUIyIikubUlCMiIiKSAqoxERERSXuZU2WixERERCTNqSlHREREJAWUmIiIiEhsqClHREQk3akpR0RERCT5VGMiIiKS5iyDqkxUYyIiIiKxUeUSk+nTp9O12+mcetqpjB07NupwYmXEtcM5/oRj6VF45o6yO+4YReHZPejZq5ABAy9lxYoVEUYYP8NHDOfY447hzLPOiDqU2NNnL5xvv/2Wvn37UFjYgzPO7M4dd94RdUixU1xczLnn9eHyKwYDMHPme5x3Xh969ynkuuuGs23btogjlB+jSiUmxcXF3HjTnxh9zxhemPgCkya9xMKFC6MOKzZ6nFXIPXeP2ans4osv4dlnnuPpCc9ywgkncPc9d0UUXTwV9ujBmNFjKl6xitNnL7zq1atz//0P8Oyzz/HM08/y1ltv8eGHH0QdVqyMH/8oLVu0BKCkpITrbxjByJG38NSTz9Ks2d68+OLEiCOsfGbJm6JWpRKTefPm0rygOQUFBVSvXp3Tu3Zl6utTow4rNjp06EBeXt5OZXXq1Nkx/80332BxeNfGSIcOR5CXVy/qMGJPn73wzIzc3FwAtm3bxrZtW+PxbRETRUXLmf7WNHr0OBuAdevWUi2nGvvt1wKAnx11FK9NfTXCCOXHCpWYmFm+md1nZi8HyweZWf/UhpZ8RUUraNqs6Y7lpvn5rCgqijCi9HD7qNvofPJJvPTSiwweNCTqcCQN6bO3e4qLiyk8u5BjjzuWozseTftD20cdUmz87e+3cMUVvyErK/H1Va9efbYVF/Pvf38MwGuvvkLR8uVRhig/UtgakweBKcDewfInwNCyVjazgWY228xmjx2rau50d8XlQ3nt1al069adx8Y/GnU4IhkvOzubZ595ltenvs68efNYsOCTqEOKhWnT3qRB/QYc9P8O2lFmZowceQt/+/stXNDvPGrn5pKVnR1hlBHJoLacsJcLN3L3J83sGgB332ZmxWWt7O5jgDEAxdtK/MeHmRz5+U1Yvuy7THp5URFN8vMjjCi9dO/WnV9d9kvVmshu02dvz9StW5cjjzyS6W+9Rdu2+0cdTuQ+/PAD3pz2Bm+9/RZbtnzLxg0bGT7iGm66cST33zcOgHfemcHnixdHHGnliz6dSJ6wNSYbzawh4ABmdhSwLmVRpUi7doew+PPFLFmyhC1btvDypEl06tQp6rBibfHiz3bMT506lZYtW0UXjKQtffbCW716NevXrwdg8+bNzHjnHVq1bBlxVPEwZMgVTH75VV56cTIj/3wLHY44kptuHMnq1asA2LJlCw+Ou5+ePXtHHKn8GGFrTH4DTARam9nbQGOgV8qiSpGcnByGDx/BgIGXUlJSQmHh2bRt0zbqsGLjqmFXMmvWTNauXUvnzp24bNBgpk+fxmef/Q+zLPbee2+uu/b6qMOMlSuv/C0zg2PW6aQTGTxoMD17pt1HI+X02Qtv5cqVXPP7aygpKaakpITTTj2NE09UEleecQ89yPTp03AvoVevPhx55M+iDqnyZVCVibmHa2kxsxzgABIv/7/uvjXMdnFqyomzEh2m0LKyMugTKJJGNm8OddoXILdOjUo9UW3a8G3SvkRqV3Ls3xf2qpzeQC13/xjoATxhZoelNDIRERGpcsL2MbnW3b82s2OBzsB9wN2pC0tERERCy6CrcsImJtuvwOkGjHX3l4DqqQlJREREqqqwiclSMxsN9AUmmVmN3dhWREREJJSwyUUfEgOsnerua4EGwFUpi0pERERCsyROUSv3cmEzq+vu64GawBtBWQPgW2B2yqMTERGRisUho0iSisYxeQzoDrxPYnC10i/dAY22JSIiEjHLoMyk3MTE3btb4nayJ7j755UUk4iIiFRRFfYx8cQIbC9VQiwiIiKyJzKok0nYzq9zzOyIlEYiIiIieySD8pLQ98r5GXC+mS0GNpKI3d390JRFJiIiIlVO2MTk1JRGISIiInsuDlUdSRIqMXH3xcG9cY4lcTXO2+4+J6WRiYiISEiZk5mEvYnfdcA4oCHQCHjAzEakMjARERGpeixx0U0FK5n9F2jv7puD5VrAB+5+QEXbFm8rSdqtmDNZiQ5TaFlZmfPLQCSdbN68NeoQ0kZunRqVeqLasnlb0r5EqtfMifQkG7aPyZckRn/dHCzXAJamJCIRERHZPRn0ey1sYrIO+NjMXiHRx+QUYKaZjQJw98tTFJ+IiIhUIWETk2eDabs3kh+KiIiI7IkMqjCpODExs2ygi7ufXwnxiIiIyO6yzElNwgxJXwzsZ2bVKyEeERERqcLCNuUsAt42s4kkRn4FwN3/kZKoREREpEoKm5h8GkxZwF6pC0dERER2Vwa15IQbx+TH0Dgm4Wgck/A0jolINDSOSXiVPY7Jtq3FSfsSyamWHf9xTMzsdRKXCe/E3U9KekQiIiJSZYVtyrmy1HxNoCewLfnhiIiIyO6ySmzLMbPTgNuBbOBed785qc+/p005ZjbT3Y+saD015YSjppzw1JQjEg015YRX2U05yfyuzc4p+yQbDCHyCYmBVpcAs4Bz3f3fydp/2KacBqUWs4AOQF6yghAREZG0cCSw0N0XAZjZ48BZQOUmJsD7JPqYGLAV+AzoH2bD8jKvqJjZQHcfE3UcpWVHHUAZ4nis4kjHKTwdq3DieJxy69SIOoQfiONxikIyv2vNbCAwsFTRmFLHeB/gi1KPLQF+lqx9Q4gB1gK/A37i7i2Bh0mMZbIpmYFUsoEVryIBHatwdJzC07EKR8cpHB2nJHP3Me7eodRUqYlf2MRkhLuvN7NjgZOAe4G7UxeWiIiIxNBSoKDU8r5BWdKETUyKg7/dgLHu/hKgIepFRESqlllAWzNrGdyq5hxgYjJ3EDYxWWpmo4G+wCQzq7Eb28ZRlW+P3A06VuHoOIWnYxWOjlM4Ok6VyN23AYOBKcB84El3/ziZ+wh1ubCZ1QZOA+a5+wIzawYc4u7/TGYwIiIiUrWlfEh6ERERkbDSuTlGREREMowSkyrMzC43s/lm9mjUsaQTM5sRdQxxZGYboo4hHZlZCzP7KOo4MpGZTTKzelHHIbtHTTkhWeJGBObuJVHHkixm9h/gZHdf8iOeIyfoDCVVnJltcPc6UceRbsysBfCiu7eLOJTYC3u+ycTzdVWS9jUmZvacmb1vZh8Ho9VhZhvM7CYz+9DM3jWz/KC8dbA8z8xuLP0Lz8yuMrNZZjbXzP4QlLUws/+a2UPAR+x87XZaM7N7gFbAy2Y23MzuN7OZZvYvMzsrWKeFmU03sznBdHRQfmJQPpEkDkOcLoL3l5nZX83so+D91Dd47CEz61Fq3Ue3H8+qopxj87iZdSu13oNm1svMsoP1t3/+fhFd9HvOzHLN7KXgvPORmfU1s+uC1/WRmY0JvjAxs8OD9T4EBpV6jovM7Bkzm2xmC8zsllKPdTGzd4LP4lNmVicov9nM/h0cu78FZb2DfX5oZtMq+VBUqIxj9ZmZNQoe72BmbwTzN5jZw2b2NvBwcIyeN7M3gmN0fbDeD87X259zV/sLtjnczN4MvkOmWOLCDomau6f1BDQI/tYi8WZsSGL4/DOC8ltIDBAH8CKJmw0B/BLYEMx3IXHJmZFI1l4EjgdaACXAUVG/zhQdu8+ARsCfgZ8HZfVI3KApF6gN1AzK2wKzg/kTSYz+2zLq1xDRcdtA4g7br5C4m0A+8DnQDDgBeC5YLw/4H5ATdcyVdVyCv2Udm0JgXLBOdRLDWtciMXLn9s9oDWB2Or63gtc9ttRy3vbzU7D8cKnz0lzg+GD+r8BHwfxFwKJg25rAYhI/iBoB04DcYL3fAdcF57v/8l3td73g7zxgn9JlcZrKOFafAY2C5Q7AG8H8DSRui1Kr1DFaFrz27ef9Drs6X5c6x+1qf9WAGUDjoKwvcH/Ux0aTp3+NCXB58KvjXRIf4LbAFhLJBSTe0C2C+Y7AU8H8Y6Weo0sw/QuYAxwYPA/AYnd/N1XBx0QX4Goz+wB4g8QJsTmJD+5YM5tH4rgdVGqbme7+v8oONEaOBca7e7G7FwFvAke4+5skBh9qDJwLPO1Vr6lrl8cGeBnoZIlxkE4Hprn7NyTef/2C9997JL5w2u76qWNtHnCKmf3FzI5z93UkXu97wWfoJOBgS/R5qOfu22syHv7e87zm7uvcfTOJGsn9gKNIfP7eDo7ThUH5OmAzcJ+Znc13twp5G3jQzAYQz1tx7epYlWdi8F7Z7hV3XxWUPUPiPQdln693tb8DgHbAK8ExHUFiFFOJWNib+MWSmZ0InAx0dPdNQdVfTWCrBykwiVFrK3qdBox099Hfe/4WJGoGMp0BPd39vzsVmt0AFAHtSdQkbS71cFU4LnvqIeDnJEZEvDjiWGLD3TcHn9FTSfw6fTx4yIAh7j4lqtiSwd0/MbPDgK7AjWb2Golmmg7u/kXweaoZ4qm+LTW//fxlJL6Mz/3+ymZ2JNAZ6EVi4KuT3P2XZvYzEqN1v29mh7v7qh/x8pKqjGO1je+6F3z/OH3/fPP9zpFexnrl7e9Z4GN377iHL0NSJN1rTPKANUFSciCJXxXleZdElR4kvjS2mwJcUqrNdh8za5L0aONrCjCkVPv3T4PyPGCZJzqQXUA8f3lFZTrQN+gf0ZhE09/M4LEHgaEA7l7l+uBQ/rF5gkSydhwwOSibAvzKzKoBmNn+ZpZbyTH/aGa2N7DJ3R8h0TxzWPDQV8G5pReAu68F1lri3mMA54d4+neBY8ysTbCv3OA41QHy3H0S8GsSPyIws9bu/p67XwesJGb948o4Vp8Bhwer9Cxj0+1OMbMGZlYL6EGihmh39/dfoLGZdQzWqWZmB+/hS5IkSusaExIntl+a2XwSb7KKmlyGAo+Y2fBg23UA7v5PM/t/wDvBd/MGEr94i8t6ogzzJ+A2YK6ZZZHoF9EduAt42sz6kTheqiVJcBK/tjoCHwbLw9x9OYC7FwXvyeeiCzFSZR4b4J8kmi6ed/ctQdm9JJpb/397d4wSMRAFYPh/ndpYeAArSxv1CB7BwlLtBCuvYLmCIrZaLVhZeADBWmQrDyFsI1gICj6LGTsJG1Q2cf+vTAYymRTz8l5mMqrB8Zgy2fTNKjCIiA/gHdin3Mcj8ET5x8iXXeAyIpIyJo0ycxwRO8BVLYVBKT28ADcRMUfJqhzWc4OIzim/ugAAALdJREFUWKnHbinPoku+G6t5SknqiFJSbnIPXFNKL8PMfKgZ7omvl5lvEbEFnEXEImU+PAV+dXt1tTdTy4WjbK3/mpkZEduUD2FnasWEfiYiloBRZi43tFmg1LTXJqidS2qhBmgbmXkw7b7ob/Q9Y9LWOnBe38qegb0p90c9UtPBd8BxQ5tN4AI4MSiRpPZmKmMiSZK6re8fv0qSpH/EwESSJHWGgYkkSeoMAxNJktQZBiaSJKkzPgFrstqlMbHCggAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QKh_bJxtlhkW" + }, + "source": [ + "From the above plot we can see that the most confused classes are 'joy' and 'love' which seems obivous as these two emotions are really close. We can say the same thing 'surprise' and 'anger' as well. So our model is doing pretty well." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "16TiclmeX1xE" + }, + "source": [], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vZ-YLmJyg64T" + }, + "source": [ + "## SWAG\n", + "\n", + "Now lets try a more challenging task and see how it performs.\n", + "\n", + "SWAG is a natural language inference and commonsense reasoning task proposed in this [paper](https://arxiv.org/pdf/1808.05326.pdf).\n", + "\n", + "The basic task is that a model is\n", + "given a context **c = (s, n)**: a complete sentence\n", + "**s** and a noun phrase **n** that begins a second sentence, as well as a list of possible verb phrase sentence endings **V**. The model must then\n", + "select the most appropriate verb phrase **v** in **V**. For example\n", + "\n", + "On stage, a woman takes a seat at the piano. She\n", + "\n", + "a) sits on a bench as her sister plays with the doll.\n", + "\n", + "b) smiles with someone as the music plays.\n", + "\n", + "c) is in the crowd, watching the dancers.\n", + "\n", + "**d) nervously sets her fingers on the keys.**\n", + "\n", + "The correct answer is bolded. Given the above example the model should select **nervously sets her fingers on the keys** as the most appropriate verb phrase\n", + "\n", + "To frame this task in text-2-text setting the example is processed as below.\n", + "\n", + "context: context_text options: 1: option_1 2: option_2 3: option_3 4: option_4\n", + "\n", + "and if the actual label is 1 then the model is asked to predict the text '1'. Here's how the above example will be processed\n", + "\n", + "**Input**\n", + "\n", + "context: On stage, a woman takes a seat at the piano. She options: 1: sits on a bench as her sister plays with the doll. 2: smiles with someone as the music plays. 3: is in the crowd, watching the dancers. 4: nervously sets her fingers on the keys.\n", + "\n", + "**Target**\n", + "\n", + "4\n", + "\n", + "This is just one possible way to process these examples, there are various other ways we can formulate this problem in text-2-text setting but that's for later." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hOxk-ZoJmamm" + }, + "source": [ + "### Dataset" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "yeHfgOhThLPj" + }, + "source": [ + "import csv\n", + "from dataclasses import dataclass\n", + "\n", + "from enum import Enum\n", + "from typing import List, Optional\n", + "from transformers import PreTrainedTokenizer" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "3DulV7U5hik7", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 386 + }, + "outputId": "880c611b-d11c-4620-9d75-0bcfa423c1ff" + }, + "source": [ + "!wget https://raw.githubusercontent.com/rowanz/swagaf/master/data/train.csv\n", + "!wget https://raw.githubusercontent.com/rowanz/swagaf/master/data/val.csv\n", + "\n", + "!mkdir swag_data\n", + "!mv *.csv swag_data" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "--2020-05-09 15:06:34-- https://raw.githubusercontent.com/rowanz/swagaf/master/data/train.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 28243333 (27M) [text/plain]\n", + "Saving to: ‘train.csv’\n", + "\n", + "train.csv 100%[===================>] 26.93M 35.9MB/s in 0.8s \n", + "\n", + "2020-05-09 15:06:35 (35.9 MB/s) - ‘train.csv’ saved [28243333/28243333]\n", + "\n", + "--2020-05-09 15:06:38-- https://raw.githubusercontent.com/rowanz/swagaf/master/data/val.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 7893588 (7.5M) [text/plain]\n", + "Saving to: ‘val.csv’\n", + "\n", + "val.csv 100%[===================>] 7.53M 17.5MB/s in 0.4s \n", + "\n", + "2020-05-09 15:06:39 (17.5 MB/s) - ‘val.csv’ saved [7893588/7893588]\n", + "\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Tllm6irZg8IO" + }, + "source": [ + "# below code is adapted from https://github.com/huggingface/transformers/blob/master/examples/multiple-choice/utils_multiple_choice.py\n", + "\n", + "@dataclass(frozen=True)\n", + "class InputExample:\n", + " \"\"\"\n", + " A single training/test example for multiple choice\n", + " Args:\n", + " example_id: Unique id for the example.\n", + " question: string. The untokenized text of the second sequence (question).\n", + " contexts: list of str. The untokenized text of the first sequence (context of corresponding question).\n", + " endings: list of str. multiple choice's options. Its length must be equal to contexts' length.\n", + " label: (Optional) string. The label of the example. This should be\n", + " specified for train and dev examples, but not for test examples.\n", + " \"\"\"\n", + "\n", + " example_id: str\n", + " context: str\n", + " endings: List[str]\n", + " label: Optional[str]\n", + "\n", + "class Split(Enum):\n", + " train = \"train\"\n", + " dev = \"dev\"\n", + " test = \"test\"\n", + "\n", + "class DataProcessor:\n", + " \"\"\"Base class for data converters for multiple choice data sets.\"\"\"\n", + "\n", + " def get_train_examples(self, data_dir):\n", + " \"\"\"Gets a collection of `InputExample`s for the train set.\"\"\"\n", + " raise NotImplementedError()\n", + "\n", + " def get_dev_examples(self, data_dir):\n", + " \"\"\"Gets a collection of `InputExample`s for the dev set.\"\"\"\n", + " raise NotImplementedError()\n", + "\n", + " def get_test_examples(self, data_dir):\n", + " \"\"\"Gets a collection of `InputExample`s for the test set.\"\"\"\n", + " raise NotImplementedError()\n", + "\n", + " def get_labels(self):\n", + " \"\"\"Gets the list of labels for this data set.\"\"\"\n", + " raise NotImplementedError()\n", + "\n", + "class SwagProcessor(DataProcessor):\n", + " \"\"\"Processor for the SWAG data set.\"\"\"\n", + "\n", + " def get_train_examples(self, data_dir):\n", + " \"\"\"See base class.\"\"\"\n", + " logger.info(\"LOOKING AT {} train\".format(data_dir))\n", + " return self._create_examples(self._read_csv(os.path.join(data_dir, \"train.csv\")), \"train\")\n", + "\n", + " def get_dev_examples(self, data_dir):\n", + " \"\"\"See base class.\"\"\"\n", + " logger.info(\"LOOKING AT {} dev\".format(data_dir))\n", + " return self._create_examples(self._read_csv(os.path.join(data_dir, \"val.csv\")), \"dev\")\n", + "\n", + " def get_test_examples(self, data_dir):\n", + " \"\"\"See base class.\"\"\"\n", + " logger.info(\"LOOKING AT {} dev\".format(data_dir))\n", + " raise ValueError(\n", + " \"For swag testing, the input file does not contain a label column. It can not be tested in current code\"\n", + " \"setting!\"\n", + " )\n", + " return self._create_examples(self._read_csv(os.path.join(data_dir, \"test.csv\")), \"test\")\n", + "\n", + " def get_labels(self):\n", + " \"\"\"See base class.\"\"\"\n", + " return [\"0\", \"1\", \"2\", \"3\"]\n", + "\n", + " def _read_csv(self, input_file):\n", + " with open(input_file, \"r\", encoding=\"utf-8\") as f:\n", + " return list(csv.reader(f))\n", + "\n", + " def _create_examples(self, lines: List[List[str]], type: str):\n", + " \"\"\"Creates examples for the training and dev sets.\"\"\"\n", + " if type == \"train\" and lines[0][-1] != \"label\":\n", + " raise ValueError(\"For training, the input file must contain a label column.\")\n", + "\n", + " examples = [\n", + " InputExample(\n", + " example_id=line[2],\n", + " # common beginning of each\n", + " # choice is stored in \"sent2\".\n", + " context=line[3],\n", + " endings=[line[7], line[8], line[9], line[10]],\n", + " label=line[11],\n", + " )\n", + " for line in lines[1:] # we skip the line with the column names\n", + " ]\n", + "\n", + " return examples" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "-OXxGvqZjC9L" + }, + "source": [ + "class SwagDataset(Dataset):\n", + " def __init__(self, tokenizer, data_dir, type_path, max_len=512):\n", + " self.data_dir = data_dir\n", + " self.type_path = type_path\n", + " self.max_len = max_len\n", + " self.tokenizer = tokenizer\n", + " self.inputs = []\n", + " self.targets = []\n", + "\n", + " self.proc = SwagProcessor()\n", + "\n", + " self._build()\n", + "\n", + " def __getitem__(self, index):\n", + " source_ids = self.inputs[index][\"input_ids\"].squeeze()\n", + " target_ids = self.targets[index][\"input_ids\"].squeeze()\n", + "\n", + " src_mask = self.inputs[index][\"attention_mask\"].squeeze() # might need to squeeze\n", + " target_mask = self.targets[index][\"attention_mask\"].squeeze() # might need to squeeze\n", + "\n", + " return {\"source_ids\": source_ids, \"source_mask\": src_mask, \"target_ids\": target_ids, \"target_mask\": target_mask}\n", + "\n", + " def __len__(self):\n", + " return len(self.inputs)\n", + "\n", + " def _build(self):\n", + " if self.type_path == 'train':\n", + " examples = self.proc.get_train_examples(self.data_dir)\n", + " else:\n", + " examples = self.proc.get_dev_examples(self.data_dir)\n", + "\n", + " for example in examples:\n", + " self._create_features(example)\n", + "\n", + " def _create_features(self, example):\n", + " input_ = example.context\n", + " options = ['%s: %s' % (i, option) for i, option in zip('1234', example.endings)]\n", + " options = \" \".join(options)\n", + " input_ = \"context: %s options: %s \" % (input_, options)\n", + " target = \"%s \" % str(int(example.label) + 1)\n", + "\n", + " # tokenize inputs\n", + " tokenized_inputs = self.tokenizer.batch_encode_plus(\n", + " [input_], max_length=self.max_len, pad_to_max_length=True, return_tensors=\"pt\"\n", + " )\n", + " # tokenize targets\n", + " tokenized_targets = self.tokenizer.batch_encode_plus(\n", + " [target], max_length=2, pad_to_max_length=True, return_tensors=\"pt\"\n", + " )\n", + "\n", + " self.inputs.append(tokenized_inputs)\n", + " self.targets.append(tokenized_targets)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "oKqFMTku3sDC", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 186, + "referenced_widgets": [ + "78b1b91a08214461b74fb1e143247d1e", + "902a509471004d2691d807c4990fccd2", + "74ec15497e1743a4af6be12e3bc1487d", + "a70b457d9379403f9fac247de68bb8e3", + "28f9d9aa0ece4831b0f9e412d8a88f8d", + "7640680e1006492da75d873726567fed", + "1090e3e017564a2281c60fb53a901c75", + "9df2679ba627444e9b76bd2ff0ddc657" + ] + }, + "outputId": "97ce9f8a-4b75-4d95-ba04-fae101f8db82" + }, + "source": [ + "tokenizer = T5Tokenizer.from_pretrained('t5-base')" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "INFO:filelock:Lock 140245777042344 acquired on /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f.lock\n", + "INFO:transformers.file_utils:https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model not found in cache or force_download set to True, downloading to /root/.cache/torch/transformers/tmpv2ybakmg\n" + ], + "name": "stderr" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "78b1b91a08214461b74fb1e143247d1e", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=791656.0, style=ProgressStyle(descripti…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "INFO:transformers.file_utils:storing https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model in cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\n", + "INFO:transformers.file_utils:creating metadata file for /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\n", + "INFO:filelock:Lock 140245777042344 released on /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f.lock\n", + "INFO:transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model from cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\n" + ], + "name": "stderr" + }, + { + "output_type": "stream", + "text": [ + "\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "PIUiU7zSpbb3", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 50 + }, + "outputId": "328b5f15-fe96-43ce-99e9-5d4233a7e97a" + }, + "source": [ + "dataset = SwagDataset(tokenizer, data_dir='swag_data', type_path='val')\n", + "len(dataset)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "INFO:__main__:LOOKING AT swag_data dev\n" + ], + "name": "stderr" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "20006" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 14 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "zxXGbCzB37HG", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "outputId": "8fbda79c-7be7-4d5f-8d5f-7b986a1c374b" + }, + "source": [ + "data = dataset[69]\n", + "print(tokenizer.decode(data['source_ids']))\n", + "print(tokenizer.decode(data['target_ids']))" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "context: A little girl plays softly the drums holding two sticks while she is singing on a microphone. The, the girl options: 1: take in the greeting and an asian girl followed by two people standing on stage. 2: holds the microphone up and begins to girl dance an entire time. 3: claps the girls hands anxiously. 4: plays more fast the drums.\n", + "4\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aVfmE4O3Ku7H" + }, + "source": [ + "### Train" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "DDPxWUY86llx" + }, + "source": [ + "!mkdir -p t5_swag" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "PrWtMjcj6lmA", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 54 + }, + "outputId": "fe4e58ab-6916-45f9-f742-797d87ad1ef4" + }, + "source": [ + "args_dict.update({'data_dir': 'swag_data', 'output_dir': 't5_swag', 'num_train_epochs': 3})\n", + "args = argparse.Namespace(**args_dict)\n", + "print(args_dict)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "{'data_dir': 'swag_data', 'output_dir': 't5_swag', 'model_name_or_path': 't5-base', 'tokenizer_name_or_path': 't5-base', 'max_seq_length': 512, 'learning_rate': 0.0003, 'weight_decay': 0.0, 'adam_epsilon': 1e-08, 'warmup_steps': 0, 'train_batch_size': 8, 'eval_batch_size': 8, 'num_train_epochs': 3, 'gradient_accumulation_steps': 16, 'n_gpu': 1, 'early_stop_callback': False, 'fp_16': False, 'opt_level': 'O1', 'max_grad_norm': 1.0, 'seed': 42}\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "2Ojz3THj6lmK" + }, + "source": [ + "checkpoint_callback = pl.callbacks.ModelCheckpoint(\n", + " filepath=args.output_dir, prefix=\"checkpoint\", monitor=\"val_loss\", mode=\"min\", save_top_k=5\n", + ")\n", + "\n", + "train_params = dict(\n", + " accumulate_grad_batches=args.gradient_accumulation_steps,\n", + " gpus=args.n_gpu,\n", + " max_epochs=args.num_train_epochs,\n", + " early_stop_callback=False,\n", + " precision= 16 if args.fp_16 else 32,\n", + " amp_level=args.opt_level,\n", + " gradient_clip_val=args.max_grad_norm,\n", + " checkpoint_callback=checkpoint_callback,\n", + " callbacks=[LoggingCallback()],\n", + ")" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "Kk0x0Nql6lmQ" + }, + "source": [ + "def get_dataset(tokenizer, type_path, args):\n", + " return SwagDataset(tokenizer=tokenizer, data_dir=args.data_dir, type_path=type_path, max_len=args.max_seq_length)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "XDFGzzpQ6lmU", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000, + "referenced_widgets": [ + "5c7427d7db844b9691d30cf2de1efc17", + "bb0df1833ee3489da5c2a9c7b1306cc6", + "3d2817812b6f475a8c838fd14646469a", + "9d0f0c946790477fb8bc8bac64dfd7de", + "8254b8062d5e4280bea46f8bc444c5db", + "ab5f07ab5c574148a0062eb7f1ce5bcd", + "47fdc2009efc443392ecd182996fcca9", + "9b705e83fea84cbf912e33d6342be721", + "e8e8ea6199df43019930ac7b557c46a5", + "0566f29b017f47f399d7579d7929e046", + "932309f0a40b46659c0cac7cc37fdc05", + "da3665141bd44a24a5b5c9f36d4a9c52", + "5c98e3a5b6a6403a936a725f4c30cdd3", + "8da2b560fa9348098a2a7f09967d5f5f", + "7e37cac227014717987922341f8099fe", + "b95f98f98a76434591f90d41b43e39ba" + ] + }, + "outputId": "94aa8d13-9d11-4fa9-979f-e3bbf15bb639" + }, + "source": [ + "model = T5FineTuner(args)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "INFO:filelock:Lock 140242832534944 acquired on /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b.lock\n", + "INFO:transformers.file_utils:https://s3.amazonaws.com/models.huggingface.co/bert/t5-base-config.json not found in cache or force_download set to True, downloading to /root/.cache/torch/transformers/tmpwv74k3ig\n" + ], + "name": "stderr" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "5c7427d7db844b9691d30cf2de1efc17", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1199.0, style=ProgressStyle(description…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "INFO:transformers.file_utils:storing https://s3.amazonaws.com/models.huggingface.co/bert/t5-base-config.json in cache at /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b\n", + "INFO:transformers.file_utils:creating metadata file for /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b\n", + "INFO:filelock:Lock 140242832534944 released on /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b.lock\n", + "INFO:transformers.configuration_utils:loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/t5-base-config.json from cache at /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b\n", + "INFO:transformers.configuration_utils:Model config T5Config {\n", + " \"architectures\": [\n", + " \"T5WithLMHeadModel\"\n", + " ],\n", + " \"d_ff\": 3072,\n", + " \"d_kv\": 64,\n", + " \"d_model\": 768,\n", + " \"decoder_start_token_id\": 0,\n", + " \"dropout_rate\": 0.1,\n", + " \"eos_token_id\": 1,\n", + " \"initializer_factor\": 1.0,\n", + " \"is_encoder_decoder\": true,\n", + " \"layer_norm_epsilon\": 1e-06,\n", + " \"model_type\": \"t5\",\n", + " \"n_positions\": 512,\n", + " \"num_heads\": 12,\n", + " \"num_layers\": 12,\n", + " \"output_past\": true,\n", + " \"pad_token_id\": 0,\n", + " \"relative_attention_num_buckets\": 32,\n", + " \"task_specific_params\": {\n", + " \"summarization\": {\n", + " \"early_stopping\": true,\n", + " \"length_penalty\": 2.0,\n", + " \"max_length\": 200,\n", + " \"min_length\": 30,\n", + " \"no_repeat_ngram_size\": 3,\n", + " \"num_beams\": 4,\n", + " \"prefix\": \"summarize: \"\n", + " },\n", + " \"translation_en_to_de\": {\n", + " \"early_stopping\": true,\n", + " \"max_length\": 300,\n", + " \"num_beams\": 4,\n", + " \"prefix\": \"translate English to German: \"\n", + " },\n", + " \"translation_en_to_fr\": {\n", + " \"early_stopping\": true,\n", + " \"max_length\": 300,\n", + " \"num_beams\": 4,\n", + " \"prefix\": \"translate English to French: \"\n", + " },\n", + " \"translation_en_to_ro\": {\n", + " \"early_stopping\": true,\n", + " \"max_length\": 300,\n", + " \"num_beams\": 4,\n", + " \"prefix\": \"translate English to Romanian: \"\n", + " }\n", + " },\n", + " \"vocab_size\": 32128\n", + "}\n", + "\n" + ], + "name": "stderr" + }, + { + "output_type": "stream", + "text": [ + "\n" + ], + "name": "stdout" + }, + { + "output_type": "stream", + "text": [ + "INFO:filelock:Lock 140242971659568 acquired on /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa.lock\n", + "INFO:transformers.file_utils:https://cdn.huggingface.co/t5-base-pytorch_model.bin not found in cache or force_download set to True, downloading to /root/.cache/torch/transformers/tmp5pcfx_u3\n" + ], + "name": "stderr" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "e8e8ea6199df43019930ac7b557c46a5", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=891691430.0, style=ProgressStyle(descri…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "INFO:transformers.file_utils:storing https://cdn.huggingface.co/t5-base-pytorch_model.bin in cache at /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa\n", + "INFO:transformers.file_utils:creating metadata file for /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa\n", + "INFO:filelock:Lock 140242971659568 released on /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa.lock\n", + "INFO:transformers.modeling_utils:loading weights file https://cdn.huggingface.co/t5-base-pytorch_model.bin from cache at /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa\n" + ], + "name": "stderr" + }, + { + "output_type": "stream", + "text": [ + "\n" + ], + "name": "stdout" + }, + { + "output_type": "stream", + "text": [ + "INFO:transformers.modeling_utils:Weights of T5ForConditionalGeneration not initialized from pretrained model: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight']\n", + "INFO:transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model from cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\n" + ], + "name": "stderr" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "1sQVILFo63Eb", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 50 + }, + "outputId": "57300f1a-14a8-4e26-8dac-9238e34741c0" + }, + "source": [ + "trainer = pl.Trainer(**train_params)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "INFO:lightning:GPU available: True, used: True\n", + "INFO:lightning:CUDA_VISIBLE_DEVICES: [0]\n" + ], + "name": "stderr" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "STkqK5nC64YP", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000, + "referenced_widgets": [ + "8e79d03deee94b299431330441bd64c8", + "510043ffee634f86b89ec3fc060a74ea", + "e86c5fbd48ce4215a0df353122183982", + "bfc3a5a3cf2e49868053db6f1ef7785d", + "361a2f79ed89495894d0b09a709f8f32", + "f7e53d55f0234627a3b9f2c90eb8682f", + "3584c01b0c5e47dfa373bae29461e94a", + "cfd9db6f31474a8189e741bf8fdad6a9", + "68705cee3df5458fb5145046337d925c", + "4cf1613d58bd450780ac95c994686985", + "3ee5f7cf56394175900ebb14ae0b5f9e", + "9f054dcf926c45459b7aa728493571a0", + "b52599dda9d94c83891d1c42c5f557e0", + "a1cf907a3bcc4177b1d5dd9edbf30c20", + "82b29ceeb21c417782e9e29a81eb47ea", + "886260804ffd4e11bc93fb6e098111ab", + "69f6eb1cb0434128961b5d83529813c5", + "6723d50588a248d0ad7bb118de8c3fd5", + "86d71b8233c14252a897ffa29ea6d9df", + "d01c708e22ab423896271fa79860e7c3", + "0e8da5995754472fac5fba1f8b30d107", + "3dbee77f299f4e14a1698b60d609b8a1", + "8c4c9025aaae44148591ae6f8bb37347", + "29e2f2f0914e4dea8117844675b42be5", + "0cfc8fa73f164b4fa5ddcbc3f115ef9b", + "4559bd35b33f4804b968debaaf316463", + "e403cc7718bf48f1b95150482e083f02", + "f6248a9db7f2466a9ab3a4fbd214f265", + "475e5353d31147d3ab156c0e7835684c", + "c3f65d683c6e4fe18e31ecc305f8d455", + "9b50abad66b44022aa389bc3f312db6b", + "762b2941ff3e47d89b6e6ce4350bc058" + ] + }, + "outputId": "cb613d72-009f-44eb-acd8-b9c3dd44b0cb" + }, + "source": [ + "trainer.fit(model)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "INFO:lightning:\n", + " | Name | Type | Params\n", + "-----------------------------------------------------------------------------------------------------------------\n", + "0 | model | T5ForConditionalGeneration | 222 M \n", + "1 | model.shared | Embedding | 24 M \n", + "2 | model.encoder | T5Stack | 109 M \n", + "3 | model.encoder.block | ModuleList | 84 M \n", + "4 | model.encoder.block.0 | T5Block | 7 M \n", + "5 | model.encoder.block.0.layer | ModuleList | 7 M \n", + "6 | model.encoder.block.0.layer.0 | T5LayerSelfAttention | 2 M \n", + "7 | model.encoder.block.0.layer.0.SelfAttention | T5Attention | 2 M \n", + "8 | model.encoder.block.0.layer.0.SelfAttention.q | Linear | 589 K \n", + "9 | model.encoder.block.0.layer.0.SelfAttention.k | Linear | 589 K \n", + "10 | model.encoder.block.0.layer.0.SelfAttention.v | Linear | 589 K \n", + "11 | model.encoder.block.0.layer.0.SelfAttention.o | Linear | 589 K \n", + "12 | model.encoder.block.0.layer.0.SelfAttention.relative_attention_bias | Embedding | 384 \n", + "13 | model.encoder.block.0.layer.0.layer_norm | T5LayerNorm | 768 \n", + "14 | model.encoder.block.0.layer.0.dropout | Dropout | 0 \n", + "15 | model.encoder.block.0.layer.1 | T5LayerFF | 4 M \n", + "16 | model.encoder.block.0.layer.1.DenseReluDense | T5DenseReluDense | 4 M \n", + "17 | model.encoder.block.0.layer.1.DenseReluDense.wi | Linear | 2 M \n", + "18 | model.encoder.block.0.layer.1.DenseReluDense.wo | Linear | 2 M \n", + "19 | model.encoder.block.0.layer.1.DenseReluDense.dropout | Dropout | 0 \n", + "20 | model.encoder.block.0.layer.1.layer_norm | T5LayerNorm | 768 \n", + "21 | model.encoder.block.0.layer.1.dropout | Dropout | 0 \n", + "22 | model.encoder.block.1 | T5Block | 7 M \n", + "23 | model.encoder.block.1.layer | ModuleList | 7 M \n", + "24 | model.encoder.block.1.layer.0 | T5LayerSelfAttention | 2 M \n", + "25 | model.encoder.block.1.layer.0.SelfAttention | T5Attention | 2 M \n", + "26 | model.encoder.block.1.layer.0.SelfAttention.q | Linear | 589 K \n", + "27 | model.encoder.block.1.layer.0.SelfAttention.k | Linear | 589 K \n", + "28 | model.encoder.block.1.layer.0.SelfAttention.v | Linear | 589 K \n", + "29 | model.encoder.block.1.layer.0.SelfAttention.o | Linear | 589 K \n", + "30 | model.encoder.block.1.layer.0.layer_norm | T5LayerNorm | 768 \n", + "31 | model.encoder.block.1.layer.0.dropout | Dropout | 0 \n", + "32 | model.encoder.block.1.layer.1 | T5LayerFF | 4 M \n", + "33 | model.encoder.block.1.layer.1.DenseReluDense | T5DenseReluDense | 4 M \n", + "34 | model.encoder.block.1.layer.1.DenseReluDense.wi | Linear | 2 M \n", + "35 | model.encoder.block.1.layer.1.DenseReluDense.wo | Linear | 2 M \n", + "36 | model.encoder.block.1.layer.1.DenseReluDense.dropout | Dropout | 0 \n", + "37 | model.encoder.block.1.layer.1.layer_norm | T5LayerNorm | 768 \n", + "38 | model.encoder.block.1.layer.1.dropout | Dropout | 0 \n", + "39 | model.encoder.block.2 | T5Block | 7 M \n", + "40 | model.encoder.block.2.layer | ModuleList | 7 M \n", + "41 | model.encoder.block.2.layer.0 | T5LayerSelfAttention | 2 M \n", + "42 | model.encoder.block.2.layer.0.SelfAttention | T5Attention | 2 M \n", + "43 | model.encoder.block.2.layer.0.SelfAttention.q | Linear | 589 K \n", + "44 | model.encoder.block.2.layer.0.SelfAttention.k | Linear | 589 K \n", + "45 | model.encoder.block.2.layer.0.SelfAttention.v | Linear | 589 K \n", + "46 | model.encoder.block.2.layer.0.SelfAttention.o | Linear | 589 K \n", + "47 | model.encoder.block.2.layer.0.layer_norm | T5LayerNorm | 768 \n", + "48 | model.encoder.block.2.layer.0.dropout | Dropout | 0 \n", + "49 | model.encoder.block.2.layer.1 | T5LayerFF | 4 M \n", + "50 | model.encoder.block.2.layer.1.DenseReluDense | T5DenseReluDense | 4 M \n", + "51 | model.encoder.block.2.layer.1.DenseReluDense.wi | Linear | 2 M \n", + "52 | model.encoder.block.2.layer.1.DenseReluDense.wo | Linear | 2 M \n", + "53 | model.encoder.block.2.layer.1.DenseReluDense.dropout | Dropout | 0 \n", + "54 | model.encoder.block.2.layer.1.layer_norm | T5LayerNorm | 768 \n", + "55 | model.encoder.block.2.layer.1.dropout | Dropout | 0 \n", + "56 | model.encoder.block.3 | T5Block | 7 M \n", + "57 | model.encoder.block.3.layer | ModuleList | 7 M \n", + "58 | model.encoder.block.3.layer.0 | T5LayerSelfAttention | 2 M \n", + "59 | model.encoder.block.3.layer.0.SelfAttention | T5Attention | 2 M \n", + "60 | model.encoder.block.3.layer.0.SelfAttention.q | Linear | 589 K \n", + "61 | model.encoder.block.3.layer.0.SelfAttention.k | Linear | 589 K \n", + "62 | model.encoder.block.3.layer.0.SelfAttention.v | Linear | 589 K \n", + "63 | model.encoder.block.3.layer.0.SelfAttention.o | Linear | 589 K \n", + "64 | model.encoder.block.3.layer.0.layer_norm | T5LayerNorm | 768 \n", + "65 | model.encoder.block.3.layer.0.dropout | Dropout | 0 \n", + "66 | model.encoder.block.3.layer.1 | T5LayerFF | 4 M \n", + "67 | model.encoder.block.3.layer.1.DenseReluDense | T5DenseReluDense | 4 M \n", + "68 | model.encoder.block.3.layer.1.DenseReluDense.wi | Linear | 2 M \n", + "69 | model.encoder.block.3.layer.1.DenseReluDense.wo | Linear | 2 M \n", + "70 | model.encoder.block.3.layer.1.DenseReluDense.dropout | Dropout | 0 \n", + "71 | model.encoder.block.3.layer.1.layer_norm | T5LayerNorm | 768 \n", + "72 | model.encoder.block.3.layer.1.dropout | Dropout | 0 \n", + "73 | model.encoder.block.4 | T5Block | 7 M \n", + "74 | model.encoder.block.4.layer | ModuleList | 7 M \n", + "75 | model.encoder.block.4.layer.0 | T5LayerSelfAttention | 2 M \n", + "76 | model.encoder.block.4.layer.0.SelfAttention | T5Attention | 2 M \n", + "77 | model.encoder.block.4.layer.0.SelfAttention.q | Linear | 589 K \n", + "78 | model.encoder.block.4.layer.0.SelfAttention.k | Linear | 589 K \n", + "79 | model.encoder.block.4.layer.0.SelfAttention.v | Linear | 589 K \n", + "80 | model.encoder.block.4.layer.0.SelfAttention.o | Linear | 589 K \n", + "81 | model.encoder.block.4.layer.0.layer_norm | T5LayerNorm | 768 \n", + "82 | model.encoder.block.4.layer.0.dropout | Dropout | 0 \n", + "83 | model.encoder.block.4.layer.1 | T5LayerFF | 4 M \n", + "84 | model.encoder.block.4.layer.1.DenseReluDense | T5DenseReluDense | 4 M \n", + "85 | model.encoder.block.4.layer.1.DenseReluDense.wi | Linear | 2 M \n", + "86 | model.encoder.block.4.layer.1.DenseReluDense.wo | Linear | 2 M \n", + "87 | model.encoder.block.4.layer.1.DenseReluDense.dropout | Dropout | 0 \n", + "88 | model.encoder.block.4.layer.1.layer_norm | T5LayerNorm | 768 \n", + "89 | model.encoder.block.4.layer.1.dropout | Dropout | 0 \n", + "90 | model.encoder.block.5 | T5Block | 7 M \n", + "91 | model.encoder.block.5.layer | ModuleList | 7 M \n", + "92 | model.encoder.block.5.layer.0 | T5LayerSelfAttention | 2 M \n", + "93 | model.encoder.block.5.layer.0.SelfAttention | T5Attention | 2 M \n", + "94 | model.encoder.block.5.layer.0.SelfAttention.q | Linear | 589 K \n", + "95 | model.encoder.block.5.layer.0.SelfAttention.k | Linear | 589 K \n", + "96 | model.encoder.block.5.layer.0.SelfAttention.v | Linear | 589 K \n", + "97 | model.encoder.block.5.layer.0.SelfAttention.o | Linear | 589 K \n", + "98 | model.encoder.block.5.layer.0.layer_norm | T5LayerNorm | 768 \n", + "99 | model.encoder.block.5.layer.0.dropout | Dropout | 0 \n", + "100 | model.encoder.block.5.layer.1 | T5LayerFF | 4 M \n", + "101 | model.encoder.block.5.layer.1.DenseReluDense | T5DenseReluDense | 4 M \n", + "102 | model.encoder.block.5.layer.1.DenseReluDense.wi | Linear | 2 M \n", + "103 | model.encoder.block.5.layer.1.DenseReluDense.wo | Linear | 2 M \n", + "104 | model.encoder.block.5.layer.1.DenseReluDense.dropout | Dropout | 0 \n", + "105 | model.encoder.block.5.layer.1.layer_norm | T5LayerNorm | 768 \n", + "106 | model.encoder.block.5.layer.1.dropout | Dropout | 0 \n", + "107 | model.encoder.block.6 | T5Block | 7 M \n", + "108 | model.encoder.block.6.layer | ModuleList | 7 M \n", + "109 | model.encoder.block.6.layer.0 | T5LayerSelfAttention | 2 M \n", + "110 | model.encoder.block.6.layer.0.SelfAttention | T5Attention | 2 M \n", + "111 | model.encoder.block.6.layer.0.SelfAttention.q | Linear | 589 K \n", + "112 | model.encoder.block.6.layer.0.SelfAttention.k | Linear | 589 K \n", + "113 | model.encoder.block.6.layer.0.SelfAttention.v | Linear | 589 K \n", + "114 | model.encoder.block.6.layer.0.SelfAttention.o | Linear | 589 K \n", + "115 | model.encoder.block.6.layer.0.layer_norm | T5LayerNorm | 768 \n", + "116 | model.encoder.block.6.layer.0.dropout | Dropout | 0 \n", + "117 | model.encoder.block.6.layer.1 | T5LayerFF | 4 M \n", + "118 | model.encoder.block.6.layer.1.DenseReluDense | T5DenseReluDense | 4 M \n", + "119 | model.encoder.block.6.layer.1.DenseReluDense.wi | Linear | 2 M \n", + "120 | model.encoder.block.6.layer.1.DenseReluDense.wo | Linear | 2 M \n", + "121 | model.encoder.block.6.layer.1.DenseReluDense.dropout | Dropout | 0 \n", + "122 | model.encoder.block.6.layer.1.layer_norm | T5LayerNorm | 768 \n", + "123 | model.encoder.block.6.layer.1.dropout | Dropout | 0 \n", + "124 | model.encoder.block.7 | T5Block | 7 M \n", + "125 | model.encoder.block.7.layer | ModuleList | 7 M \n", + "126 | model.encoder.block.7.layer.0 | T5LayerSelfAttention | 2 M \n", + "127 | model.encoder.block.7.layer.0.SelfAttention | T5Attention | 2 M \n", + "128 | model.encoder.block.7.layer.0.SelfAttention.q | Linear | 589 K \n", + "129 | model.encoder.block.7.layer.0.SelfAttention.k | Linear | 589 K \n", + "130 | model.encoder.block.7.layer.0.SelfAttention.v | Linear | 589 K \n", + "131 | model.encoder.block.7.layer.0.SelfAttention.o | Linear | 589 K \n", + "132 | model.encoder.block.7.layer.0.layer_norm | T5LayerNorm | 768 \n", + "133 | model.encoder.block.7.layer.0.dropout | Dropout | 0 \n", + "134 | model.encoder.block.7.layer.1 | T5LayerFF | 4 M \n", + "135 | model.encoder.block.7.layer.1.DenseReluDense | T5DenseReluDense | 4 M \n", + "136 | model.encoder.block.7.layer.1.DenseReluDense.wi | Linear | 2 M \n", + "137 | model.encoder.block.7.layer.1.DenseReluDense.wo | Linear | 2 M \n", + "138 | model.encoder.block.7.layer.1.DenseReluDense.dropout | Dropout | 0 \n", + "139 | model.encoder.block.7.layer.1.layer_norm | T5LayerNorm | 768 \n", + "140 | model.encoder.block.7.layer.1.dropout | Dropout | 0 \n", + "141 | model.encoder.block.8 | T5Block | 7 M \n", + "142 | model.encoder.block.8.layer | ModuleList | 7 M \n", + "143 | model.encoder.block.8.layer.0 | T5LayerSelfAttention | 2 M \n", + "144 | model.encoder.block.8.layer.0.SelfAttention | T5Attention | 2 M \n", + "145 | model.encoder.block.8.layer.0.SelfAttention.q | Linear | 589 K \n", + "146 | model.encoder.block.8.layer.0.SelfAttention.k | Linear | 589 K \n", + "147 | model.encoder.block.8.layer.0.SelfAttention.v | Linear | 589 K \n", + "148 | model.encoder.block.8.layer.0.SelfAttention.o | Linear | 589 K \n", + "149 | model.encoder.block.8.layer.0.layer_norm | T5LayerNorm | 768 \n", + "150 | model.encoder.block.8.layer.0.dropout | Dropout | 0 \n", + "151 | model.encoder.block.8.layer.1 | T5LayerFF | 4 M \n", + "152 | model.encoder.block.8.layer.1.DenseReluDense | T5DenseReluDense | 4 M \n", + "153 | model.encoder.block.8.layer.1.DenseReluDense.wi | Linear | 2 M \n", + "154 | model.encoder.block.8.layer.1.DenseReluDense.wo | Linear | 2 M \n", + "155 | model.encoder.block.8.layer.1.DenseReluDense.dropout | Dropout | 0 \n", + "156 | model.encoder.block.8.layer.1.layer_norm | T5LayerNorm | 768 \n", + "157 | model.encoder.block.8.layer.1.dropout | Dropout | 0 \n", + "158 | model.encoder.block.9 | T5Block | 7 M \n", + "159 | model.encoder.block.9.layer | ModuleList | 7 M \n", + "160 | model.encoder.block.9.layer.0 | T5LayerSelfAttention | 2 M \n", + "161 | model.encoder.block.9.layer.0.SelfAttention | T5Attention | 2 M \n", + "162 | model.encoder.block.9.layer.0.SelfAttention.q | Linear | 589 K \n", + "163 | model.encoder.block.9.layer.0.SelfAttention.k | Linear | 589 K \n", + "164 | model.encoder.block.9.layer.0.SelfAttention.v | Linear | 589 K \n", + "165 | model.encoder.block.9.layer.0.SelfAttention.o | Linear | 589 K \n", + "166 | model.encoder.block.9.layer.0.layer_norm | T5LayerNorm | 768 \n", + "167 | model.encoder.block.9.layer.0.dropout | Dropout | 0 \n", + "168 | model.encoder.block.9.layer.1 | T5LayerFF | 4 M \n", + "169 | model.encoder.block.9.layer.1.DenseReluDense | T5DenseReluDense | 4 M \n", + "170 | model.encoder.block.9.layer.1.DenseReluDense.wi | Linear | 2 M \n", + "171 | model.encoder.block.9.layer.1.DenseReluDense.wo | Linear | 2 M \n", + "172 | model.encoder.block.9.layer.1.DenseReluDense.dropout | Dropout | 0 \n", + "173 | model.encoder.block.9.layer.1.layer_norm | T5LayerNorm | 768 \n", + "174 | model.encoder.block.9.layer.1.dropout | Dropout | 0 \n", + "175 | model.encoder.block.10 | T5Block | 7 M \n", + "176 | model.encoder.block.10.layer | ModuleList | 7 M \n", + "177 | model.encoder.block.10.layer.0 | T5LayerSelfAttention | 2 M \n", + "178 | model.encoder.block.10.layer.0.SelfAttention | T5Attention | 2 M \n", + "179 | model.encoder.block.10.layer.0.SelfAttention.q | Linear | 589 K \n", + "180 | model.encoder.block.10.layer.0.SelfAttention.k | Linear | 589 K \n", + "181 | model.encoder.block.10.layer.0.SelfAttention.v | Linear | 589 K \n", + "182 | model.encoder.block.10.layer.0.SelfAttention.o | Linear | 589 K \n", + "183 | model.encoder.block.10.layer.0.layer_norm | T5LayerNorm | 768 \n", + "184 | model.encoder.block.10.layer.0.dropout | Dropout | 0 \n", + "185 | model.encoder.block.10.layer.1 | T5LayerFF | 4 M \n", + "186 | model.encoder.block.10.layer.1.DenseReluDense | T5DenseReluDense | 4 M \n", + "187 | model.encoder.block.10.layer.1.DenseReluDense.wi | Linear | 2 M \n", + "188 | model.encoder.block.10.layer.1.DenseReluDense.wo | Linear | 2 M \n", + "189 | model.encoder.block.10.layer.1.DenseReluDense.dropout | Dropout | 0 \n", + "190 | model.encoder.block.10.layer.1.layer_norm | T5LayerNorm | 768 \n", + "191 | model.encoder.block.10.layer.1.dropout | Dropout | 0 \n", + "192 | model.encoder.block.11 | T5Block | 7 M \n", + "193 | model.encoder.block.11.layer | ModuleList | 7 M \n", + "194 | model.encoder.block.11.layer.0 | T5LayerSelfAttention | 2 M \n", + "195 | model.encoder.block.11.layer.0.SelfAttention | T5Attention | 2 M \n", + "196 | model.encoder.block.11.layer.0.SelfAttention.q | Linear | 589 K \n", + "197 | model.encoder.block.11.layer.0.SelfAttention.k | Linear | 589 K \n", + "198 | model.encoder.block.11.layer.0.SelfAttention.v | Linear | 589 K \n", + "199 | model.encoder.block.11.layer.0.SelfAttention.o | Linear | 589 K \n", + "200 | model.encoder.block.11.layer.0.layer_norm | T5LayerNorm | 768 \n", + "201 | model.encoder.block.11.layer.0.dropout | Dropout | 0 \n", + "202 | model.encoder.block.11.layer.1 | T5LayerFF | 4 M \n", + "203 | model.encoder.block.11.layer.1.DenseReluDense | T5DenseReluDense | 4 M \n", + "204 | model.encoder.block.11.layer.1.DenseReluDense.wi | Linear | 2 M \n", + "205 | model.encoder.block.11.layer.1.DenseReluDense.wo | Linear | 2 M \n", + "206 | model.encoder.block.11.layer.1.DenseReluDense.dropout | Dropout | 0 \n", + "207 | model.encoder.block.11.layer.1.layer_norm | T5LayerNorm | 768 \n", + "208 | model.encoder.block.11.layer.1.dropout | Dropout | 0 \n", + "209 | model.encoder.final_layer_norm | T5LayerNorm | 768 \n", + "210 | model.encoder.dropout | Dropout | 0 \n", + "211 | model.decoder | T5Stack | 137 M \n", + "212 | model.decoder.block | ModuleList | 113 M \n", + "213 | model.decoder.block.0 | T5Block | 9 M \n", + "214 | model.decoder.block.0.layer | ModuleList | 9 M \n", + "215 | model.decoder.block.0.layer.0 | T5LayerSelfAttention | 2 M \n", + "216 | model.decoder.block.0.layer.0.SelfAttention | T5Attention | 2 M \n", + "217 | model.decoder.block.0.layer.0.SelfAttention.q | Linear | 589 K \n", + "218 | model.decoder.block.0.layer.0.SelfAttention.k | Linear | 589 K \n", + "219 | model.decoder.block.0.layer.0.SelfAttention.v | Linear | 589 K \n", + "220 | model.decoder.block.0.layer.0.SelfAttention.o | Linear | 589 K \n", + "221 | model.decoder.block.0.layer.0.SelfAttention.relative_attention_bias | Embedding | 384 \n", + "222 | model.decoder.block.0.layer.0.layer_norm | T5LayerNorm | 768 \n", + "223 | model.decoder.block.0.layer.0.dropout | Dropout | 0 \n", + "224 | model.decoder.block.0.layer.1 | T5LayerCrossAttention | 2 M \n", + "225 | model.decoder.block.0.layer.1.EncDecAttention | T5Attention | 2 M \n", + "226 | model.decoder.block.0.layer.1.EncDecAttention.q | Linear | 589 K \n", + "227 | model.decoder.block.0.layer.1.EncDecAttention.k | Linear | 589 K \n", + "228 | model.decoder.block.0.layer.1.EncDecAttention.v | Linear | 589 K \n", + "229 | model.decoder.block.0.layer.1.EncDecAttention.o | Linear | 589 K \n", + "230 | model.decoder.block.0.layer.1.EncDecAttention.relative_attention_bias | Embedding | 384 \n", + "231 | model.decoder.block.0.layer.1.layer_norm | T5LayerNorm | 768 \n", + "232 | model.decoder.block.0.layer.1.dropout | Dropout | 0 \n", + "233 | model.decoder.block.0.layer.2 | T5LayerFF | 4 M \n", + "234 | model.decoder.block.0.layer.2.DenseReluDense | T5DenseReluDense | 4 M \n", + "235 | model.decoder.block.0.layer.2.DenseReluDense.wi | Linear | 2 M \n", + "236 | model.decoder.block.0.layer.2.DenseReluDense.wo | Linear | 2 M \n", + "237 | model.decoder.block.0.layer.2.DenseReluDense.dropout | Dropout | 0 \n", + "238 | model.decoder.block.0.layer.2.layer_norm | T5LayerNorm | 768 \n", + "239 | model.decoder.block.0.layer.2.dropout | Dropout | 0 \n", + "240 | model.decoder.block.1 | T5Block | 9 M \n", + "241 | model.decoder.block.1.layer | ModuleList | 9 M \n", + "242 | model.decoder.block.1.layer.0 | T5LayerSelfAttention | 2 M \n", + "243 | model.decoder.block.1.layer.0.SelfAttention | T5Attention | 2 M \n", + "244 | model.decoder.block.1.layer.0.SelfAttention.q | Linear | 589 K \n", + "245 | model.decoder.block.1.layer.0.SelfAttention.k | Linear | 589 K \n", + "246 | model.decoder.block.1.layer.0.SelfAttention.v | Linear | 589 K \n", + "247 | model.decoder.block.1.layer.0.SelfAttention.o | Linear | 589 K \n", + "248 | model.decoder.block.1.layer.0.layer_norm | T5LayerNorm | 768 \n", + "249 | model.decoder.block.1.layer.0.dropout | Dropout | 0 \n", + "250 | model.decoder.block.1.layer.1 | T5LayerCrossAttention | 2 M \n", + "251 | model.decoder.block.1.layer.1.EncDecAttention | T5Attention | 2 M \n", + "252 | model.decoder.block.1.layer.1.EncDecAttention.q | Linear | 589 K \n", + "253 | model.decoder.block.1.layer.1.EncDecAttention.k | Linear | 589 K \n", + "254 | model.decoder.block.1.layer.1.EncDecAttention.v | Linear | 589 K \n", + "255 | model.decoder.block.1.layer.1.EncDecAttention.o | Linear | 589 K \n", + "256 | model.decoder.block.1.layer.1.layer_norm | T5LayerNorm | 768 \n", + "257 | model.decoder.block.1.layer.1.dropout | Dropout | 0 \n", + "258 | model.decoder.block.1.layer.2 | T5LayerFF | 4 M \n", + "259 | model.decoder.block.1.layer.2.DenseReluDense | T5DenseReluDense | 4 M \n", + "260 | model.decoder.block.1.layer.2.DenseReluDense.wi | Linear | 2 M \n", + "261 | model.decoder.block.1.layer.2.DenseReluDense.wo | Linear | 2 M \n", + "262 | model.decoder.block.1.layer.2.DenseReluDense.dropout | Dropout | 0 \n", + "263 | model.decoder.block.1.layer.2.layer_norm | T5LayerNorm | 768 \n", + "264 | model.decoder.block.1.layer.2.dropout | Dropout | 0 \n", + "265 | model.decoder.block.2 | T5Block | 9 M \n", + "266 | model.decoder.block.2.layer | ModuleList | 9 M \n", + "267 | model.decoder.block.2.layer.0 | T5LayerSelfAttention | 2 M \n", + "268 | model.decoder.block.2.layer.0.SelfAttention | T5Attention | 2 M \n", + "269 | model.decoder.block.2.layer.0.SelfAttention.q | Linear | 589 K \n", + "270 | model.decoder.block.2.layer.0.SelfAttention.k | Linear | 589 K \n", + "271 | model.decoder.block.2.layer.0.SelfAttention.v | Linear | 589 K \n", + "272 | model.decoder.block.2.layer.0.SelfAttention.o | Linear | 589 K \n", + "273 | model.decoder.block.2.layer.0.layer_norm | T5LayerNorm | 768 \n", + "274 | model.decoder.block.2.layer.0.dropout | Dropout | 0 \n", + "275 | model.decoder.block.2.layer.1 | T5LayerCrossAttention | 2 M \n", + "276 | model.decoder.block.2.layer.1.EncDecAttention | T5Attention | 2 M \n", + "277 | model.decoder.block.2.layer.1.EncDecAttention.q | Linear | 589 K \n", + "278 | model.decoder.block.2.layer.1.EncDecAttention.k | Linear | 589 K \n", + "279 | model.decoder.block.2.layer.1.EncDecAttention.v | Linear | 589 K \n", + "280 | model.decoder.block.2.layer.1.EncDecAttention.o | Linear | 589 K \n", + "281 | model.decoder.block.2.layer.1.layer_norm | T5LayerNorm | 768 \n", + "282 | model.decoder.block.2.layer.1.dropout | Dropout | 0 \n", + "283 | model.decoder.block.2.layer.2 | T5LayerFF | 4 M \n", + "284 | model.decoder.block.2.layer.2.DenseReluDense | T5DenseReluDense | 4 M \n", + "285 | model.decoder.block.2.layer.2.DenseReluDense.wi | Linear | 2 M \n", + "286 | model.decoder.block.2.layer.2.DenseReluDense.wo | Linear | 2 M \n", + "287 | model.decoder.block.2.layer.2.DenseReluDense.dropout | Dropout | 0 \n", + "288 | model.decoder.block.2.layer.2.layer_norm | T5LayerNorm | 768 \n", + "289 | model.decoder.block.2.layer.2.dropout | Dropout | 0 \n", + "290 | model.decoder.block.3 | T5Block | 9 M \n", + "291 | model.decoder.block.3.layer | ModuleList | 9 M \n", + "292 | model.decoder.block.3.layer.0 | T5LayerSelfAttention | 2 M \n", + "293 | model.decoder.block.3.layer.0.SelfAttention | T5Attention | 2 M \n", + "294 | model.decoder.block.3.layer.0.SelfAttention.q | Linear | 589 K \n", + "295 | model.decoder.block.3.layer.0.SelfAttention.k | Linear | 589 K \n", + "296 | model.decoder.block.3.layer.0.SelfAttention.v | Linear | 589 K \n", + "297 | model.decoder.block.3.layer.0.SelfAttention.o | Linear | 589 K \n", + "298 | model.decoder.block.3.layer.0.layer_norm | T5LayerNorm | 768 \n", + "299 | model.decoder.block.3.layer.0.dropout | Dropout | 0 \n", + "300 | model.decoder.block.3.layer.1 | T5LayerCrossAttention | 2 M \n", + "301 | model.decoder.block.3.layer.1.EncDecAttention | T5Attention | 2 M \n", + "302 | model.decoder.block.3.layer.1.EncDecAttention.q | Linear | 589 K \n", + "303 | model.decoder.block.3.layer.1.EncDecAttention.k | Linear | 589 K \n", + "304 | model.decoder.block.3.layer.1.EncDecAttention.v | Linear | 589 K \n", + "305 | model.decoder.block.3.layer.1.EncDecAttention.o | Linear | 589 K \n", + "306 | model.decoder.block.3.layer.1.layer_norm | T5LayerNorm | 768 \n", + "307 | model.decoder.block.3.layer.1.dropout | Dropout | 0 \n", + "308 | model.decoder.block.3.layer.2 | T5LayerFF | 4 M \n", + "309 | model.decoder.block.3.layer.2.DenseReluDense | T5DenseReluDense | 4 M \n", + "310 | model.decoder.block.3.layer.2.DenseReluDense.wi | Linear | 2 M \n", + "311 | model.decoder.block.3.layer.2.DenseReluDense.wo | Linear | 2 M \n", + "312 | model.decoder.block.3.layer.2.DenseReluDense.dropout | Dropout | 0 \n", + "313 | model.decoder.block.3.layer.2.layer_norm | T5LayerNorm | 768 \n", + "314 | model.decoder.block.3.layer.2.dropout | Dropout | 0 \n", + "315 | model.decoder.block.4 | T5Block | 9 M \n", + "316 | model.decoder.block.4.layer | ModuleList | 9 M \n", + "317 | model.decoder.block.4.layer.0 | T5LayerSelfAttention | 2 M \n", + "318 | model.decoder.block.4.layer.0.SelfAttention | T5Attention | 2 M \n", + "319 | model.decoder.block.4.layer.0.SelfAttention.q | Linear | 589 K \n", + "320 | model.decoder.block.4.layer.0.SelfAttention.k | Linear | 589 K \n", + "321 | model.decoder.block.4.layer.0.SelfAttention.v | Linear | 589 K \n", + "322 | model.decoder.block.4.layer.0.SelfAttention.o | Linear | 589 K \n", + "323 | model.decoder.block.4.layer.0.layer_norm | T5LayerNorm | 768 \n", + "324 | model.decoder.block.4.layer.0.dropout | Dropout | 0 \n", + "325 | model.decoder.block.4.layer.1 | T5LayerCrossAttention | 2 M \n", + "326 | model.decoder.block.4.layer.1.EncDecAttention | T5Attention | 2 M \n", + "327 | model.decoder.block.4.layer.1.EncDecAttention.q | Linear | 589 K \n", + "328 | model.decoder.block.4.layer.1.EncDecAttention.k | Linear | 589 K \n", + "329 | model.decoder.block.4.layer.1.EncDecAttention.v | Linear | 589 K \n", + "330 | model.decoder.block.4.layer.1.EncDecAttention.o | Linear | 589 K \n", + "331 | model.decoder.block.4.layer.1.layer_norm | T5LayerNorm | 768 \n", + "332 | model.decoder.block.4.layer.1.dropout | Dropout | 0 \n", + "333 | model.decoder.block.4.layer.2 | T5LayerFF | 4 M \n", + "334 | model.decoder.block.4.layer.2.DenseReluDense | T5DenseReluDense | 4 M \n", + "335 | model.decoder.block.4.layer.2.DenseReluDense.wi | Linear | 2 M \n", + "336 | model.decoder.block.4.layer.2.DenseReluDense.wo | Linear | 2 M \n", + "337 | model.decoder.block.4.layer.2.DenseReluDense.dropout | Dropout | 0 \n", + "338 | model.decoder.block.4.layer.2.layer_norm | T5LayerNorm | 768 \n", + "339 | model.decoder.block.4.layer.2.dropout | Dropout | 0 \n", + "340 | model.decoder.block.5 | T5Block | 9 M \n", + "341 | model.decoder.block.5.layer | ModuleList | 9 M \n", + "342 | model.decoder.block.5.layer.0 | T5LayerSelfAttention | 2 M \n", + "343 | model.decoder.block.5.layer.0.SelfAttention | T5Attention | 2 M \n", + "344 | model.decoder.block.5.layer.0.SelfAttention.q | Linear | 589 K \n", + "345 | model.decoder.block.5.layer.0.SelfAttention.k | Linear | 589 K \n", + "346 | model.decoder.block.5.layer.0.SelfAttention.v | Linear | 589 K \n", + "347 | model.decoder.block.5.layer.0.SelfAttention.o | Linear | 589 K \n", + "348 | model.decoder.block.5.layer.0.layer_norm | T5LayerNorm | 768 \n", + "349 | model.decoder.block.5.layer.0.dropout | Dropout | 0 \n", + "350 | model.decoder.block.5.layer.1 | T5LayerCrossAttention | 2 M \n", + "351 | model.decoder.block.5.layer.1.EncDecAttention | T5Attention | 2 M \n", + "352 | model.decoder.block.5.layer.1.EncDecAttention.q | Linear | 589 K \n", + "353 | model.decoder.block.5.layer.1.EncDecAttention.k | Linear | 589 K \n", + "354 | model.decoder.block.5.layer.1.EncDecAttention.v | Linear | 589 K \n", + "355 | model.decoder.block.5.layer.1.EncDecAttention.o | Linear | 589 K \n", + "356 | model.decoder.block.5.layer.1.layer_norm | T5LayerNorm | 768 \n", + "357 | model.decoder.block.5.layer.1.dropout | Dropout | 0 \n", + "358 | model.decoder.block.5.layer.2 | T5LayerFF | 4 M \n", + "359 | model.decoder.block.5.layer.2.DenseReluDense | T5DenseReluDense | 4 M \n", + "360 | model.decoder.block.5.layer.2.DenseReluDense.wi | Linear | 2 M \n", + "361 | model.decoder.block.5.layer.2.DenseReluDense.wo | Linear | 2 M \n", + "362 | model.decoder.block.5.layer.2.DenseReluDense.dropout | Dropout | 0 \n", + "363 | model.decoder.block.5.layer.2.layer_norm | T5LayerNorm | 768 \n", + "364 | model.decoder.block.5.layer.2.dropout | Dropout | 0 \n", + "365 | model.decoder.block.6 | T5Block | 9 M \n", + "366 | model.decoder.block.6.layer | ModuleList | 9 M \n", + "367 | model.decoder.block.6.layer.0 | T5LayerSelfAttention | 2 M \n", + "368 | model.decoder.block.6.layer.0.SelfAttention | T5Attention | 2 M \n", + "369 | model.decoder.block.6.layer.0.SelfAttention.q | Linear | 589 K \n", + "370 | model.decoder.block.6.layer.0.SelfAttention.k | Linear | 589 K \n", + "371 | model.decoder.block.6.layer.0.SelfAttention.v | Linear | 589 K \n", + "372 | model.decoder.block.6.layer.0.SelfAttention.o | Linear | 589 K \n", + "373 | model.decoder.block.6.layer.0.layer_norm | T5LayerNorm | 768 \n", + "374 | model.decoder.block.6.layer.0.dropout | Dropout | 0 \n", + "375 | model.decoder.block.6.layer.1 | T5LayerCrossAttention | 2 M \n", + "376 | model.decoder.block.6.layer.1.EncDecAttention | T5Attention | 2 M \n", + "377 | model.decoder.block.6.layer.1.EncDecAttention.q | Linear | 589 K \n", + "378 | model.decoder.block.6.layer.1.EncDecAttention.k | Linear | 589 K \n", + "379 | model.decoder.block.6.layer.1.EncDecAttention.v | Linear | 589 K \n", + "380 | model.decoder.block.6.layer.1.EncDecAttention.o | Linear | 589 K \n", + "381 | model.decoder.block.6.layer.1.layer_norm | T5LayerNorm | 768 \n", + "382 | model.decoder.block.6.layer.1.dropout | Dropout | 0 \n", + "383 | model.decoder.block.6.layer.2 | T5LayerFF | 4 M \n", + "384 | model.decoder.block.6.layer.2.DenseReluDense | T5DenseReluDense | 4 M \n", + "385 | model.decoder.block.6.layer.2.DenseReluDense.wi | Linear | 2 M \n", + "386 | model.decoder.block.6.layer.2.DenseReluDense.wo | Linear | 2 M \n", + "387 | model.decoder.block.6.layer.2.DenseReluDense.dropout | Dropout | 0 \n", + "388 | model.decoder.block.6.layer.2.layer_norm | T5LayerNorm | 768 \n", + "389 | model.decoder.block.6.layer.2.dropout | Dropout | 0 \n", + "390 | model.decoder.block.7 | T5Block | 9 M \n", + "391 | model.decoder.block.7.layer | ModuleList | 9 M \n", + "392 | model.decoder.block.7.layer.0 | T5LayerSelfAttention | 2 M \n", + "393 | model.decoder.block.7.layer.0.SelfAttention | T5Attention | 2 M \n", + "394 | model.decoder.block.7.layer.0.SelfAttention.q | Linear | 589 K \n", + "395 | model.decoder.block.7.layer.0.SelfAttention.k | Linear | 589 K \n", + "396 | model.decoder.block.7.layer.0.SelfAttention.v | Linear | 589 K \n", + "397 | model.decoder.block.7.layer.0.SelfAttention.o | Linear | 589 K \n", + "398 | model.decoder.block.7.layer.0.layer_norm | T5LayerNorm | 768 \n", + "399 | model.decoder.block.7.layer.0.dropout | Dropout | 0 \n", + "400 | model.decoder.block.7.layer.1 | T5LayerCrossAttention | 2 M \n", + "401 | model.decoder.block.7.layer.1.EncDecAttention | T5Attention | 2 M \n", + "402 | model.decoder.block.7.layer.1.EncDecAttention.q | Linear | 589 K \n", + "403 | model.decoder.block.7.layer.1.EncDecAttention.k | Linear | 589 K \n", + "404 | model.decoder.block.7.layer.1.EncDecAttention.v | Linear | 589 K \n", + "405 | model.decoder.block.7.layer.1.EncDecAttention.o | Linear | 589 K \n", + "406 | model.decoder.block.7.layer.1.layer_norm | T5LayerNorm | 768 \n", + "407 | model.decoder.block.7.layer.1.dropout | Dropout | 0 \n", + "408 | model.decoder.block.7.layer.2 | T5LayerFF | 4 M \n", + "409 | model.decoder.block.7.layer.2.DenseReluDense | T5DenseReluDense | 4 M \n", + "410 | model.decoder.block.7.layer.2.DenseReluDense.wi | Linear | 2 M \n", + "411 | model.decoder.block.7.layer.2.DenseReluDense.wo | Linear | 2 M \n", + "412 | model.decoder.block.7.layer.2.DenseReluDense.dropout | Dropout | 0 \n", + "413 | model.decoder.block.7.layer.2.layer_norm | T5LayerNorm | 768 \n", + "414 | model.decoder.block.7.layer.2.dropout | Dropout | 0 \n", + "415 | model.decoder.block.8 | T5Block | 9 M \n", + "416 | model.decoder.block.8.layer | ModuleList | 9 M \n", + "417 | model.decoder.block.8.layer.0 | T5LayerSelfAttention | 2 M \n", + "418 | model.decoder.block.8.layer.0.SelfAttention | T5Attention | 2 M \n", + "419 | model.decoder.block.8.layer.0.SelfAttention.q | Linear | 589 K \n", + "420 | model.decoder.block.8.layer.0.SelfAttention.k | Linear | 589 K \n", + "421 | model.decoder.block.8.layer.0.SelfAttention.v | Linear | 589 K \n", + "422 | model.decoder.block.8.layer.0.SelfAttention.o | Linear | 589 K \n", + "423 | model.decoder.block.8.layer.0.layer_norm | T5LayerNorm | 768 \n", + "424 | model.decoder.block.8.layer.0.dropout | Dropout | 0 \n", + "425 | model.decoder.block.8.layer.1 | T5LayerCrossAttention | 2 M \n", + "426 | model.decoder.block.8.layer.1.EncDecAttention | T5Attention | 2 M \n", + "427 | model.decoder.block.8.layer.1.EncDecAttention.q | Linear | 589 K \n", + "428 | model.decoder.block.8.layer.1.EncDecAttention.k | Linear | 589 K \n", + "429 | model.decoder.block.8.layer.1.EncDecAttention.v | Linear | 589 K \n", + "430 | model.decoder.block.8.layer.1.EncDecAttention.o | Linear | 589 K \n", + "431 | model.decoder.block.8.layer.1.layer_norm | T5LayerNorm | 768 \n", + "432 | model.decoder.block.8.layer.1.dropout | Dropout | 0 \n", + "433 | model.decoder.block.8.layer.2 | T5LayerFF | 4 M \n", + "434 | model.decoder.block.8.layer.2.DenseReluDense | T5DenseReluDense | 4 M \n", + "435 | model.decoder.block.8.layer.2.DenseReluDense.wi | Linear | 2 M \n", + "436 | model.decoder.block.8.layer.2.DenseReluDense.wo | Linear | 2 M \n", + "437 | model.decoder.block.8.layer.2.DenseReluDense.dropout | Dropout | 0 \n", + "438 | model.decoder.block.8.layer.2.layer_norm | T5LayerNorm | 768 \n", + "439 | model.decoder.block.8.layer.2.dropout | Dropout | 0 \n", + "440 | model.decoder.block.9 | T5Block | 9 M \n", + "441 | model.decoder.block.9.layer | ModuleList | 9 M \n", + "442 | model.decoder.block.9.layer.0 | T5LayerSelfAttention | 2 M \n", + "443 | model.decoder.block.9.layer.0.SelfAttention | T5Attention | 2 M \n", + "444 | model.decoder.block.9.layer.0.SelfAttention.q | Linear | 589 K \n", + "445 | model.decoder.block.9.layer.0.SelfAttention.k | Linear | 589 K \n", + "446 | model.decoder.block.9.layer.0.SelfAttention.v | Linear | 589 K \n", + "447 | model.decoder.block.9.layer.0.SelfAttention.o | Linear | 589 K \n", + "448 | model.decoder.block.9.layer.0.layer_norm | T5LayerNorm | 768 \n", + "449 | model.decoder.block.9.layer.0.dropout | Dropout | 0 \n", + "450 | model.decoder.block.9.layer.1 | T5LayerCrossAttention | 2 M \n", + "451 | model.decoder.block.9.layer.1.EncDecAttention | T5Attention | 2 M \n", + "452 | model.decoder.block.9.layer.1.EncDecAttention.q | Linear | 589 K \n", + "453 | model.decoder.block.9.layer.1.EncDecAttention.k | Linear | 589 K \n", + "454 | model.decoder.block.9.layer.1.EncDecAttention.v | Linear | 589 K \n", + "455 | model.decoder.block.9.layer.1.EncDecAttention.o | Linear | 589 K \n", + "456 | model.decoder.block.9.layer.1.layer_norm | T5LayerNorm | 768 \n", + "457 | model.decoder.block.9.layer.1.dropout | Dropout | 0 \n", + "458 | model.decoder.block.9.layer.2 | T5LayerFF | 4 M \n", + "459 | model.decoder.block.9.layer.2.DenseReluDense | T5DenseReluDense | 4 M \n", + "460 | model.decoder.block.9.layer.2.DenseReluDense.wi | Linear | 2 M \n", + "461 | model.decoder.block.9.layer.2.DenseReluDense.wo | Linear | 2 M \n", + "462 | model.decoder.block.9.layer.2.DenseReluDense.dropout | Dropout | 0 \n", + "463 | model.decoder.block.9.layer.2.layer_norm | T5LayerNorm | 768 \n", + "464 | model.decoder.block.9.layer.2.dropout | Dropout | 0 \n", + "465 | model.decoder.block.10 | T5Block | 9 M \n", + "466 | model.decoder.block.10.layer | ModuleList | 9 M \n", + "467 | model.decoder.block.10.layer.0 | T5LayerSelfAttention | 2 M \n", + "468 | model.decoder.block.10.layer.0.SelfAttention | T5Attention | 2 M \n", + "469 | model.decoder.block.10.layer.0.SelfAttention.q | Linear | 589 K \n", + "470 | model.decoder.block.10.layer.0.SelfAttention.k | Linear | 589 K \n", + "471 | model.decoder.block.10.layer.0.SelfAttention.v | Linear | 589 K \n", + "472 | model.decoder.block.10.layer.0.SelfAttention.o | Linear | 589 K \n", + "473 | model.decoder.block.10.layer.0.layer_norm | T5LayerNorm | 768 \n", + "474 | model.decoder.block.10.layer.0.dropout | Dropout | 0 \n", + "475 | model.decoder.block.10.layer.1 | T5LayerCrossAttention | 2 M \n", + "476 | model.decoder.block.10.layer.1.EncDecAttention | T5Attention | 2 M \n", + "477 | model.decoder.block.10.layer.1.EncDecAttention.q | Linear | 589 K \n", + "478 | model.decoder.block.10.layer.1.EncDecAttention.k | Linear | 589 K \n", + "479 | model.decoder.block.10.layer.1.EncDecAttention.v | Linear | 589 K \n", + "480 | model.decoder.block.10.layer.1.EncDecAttention.o | Linear | 589 K \n", + "481 | model.decoder.block.10.layer.1.layer_norm | T5LayerNorm | 768 \n", + "482 | model.decoder.block.10.layer.1.dropout | Dropout | 0 \n", + "483 | model.decoder.block.10.layer.2 | T5LayerFF | 4 M \n", + "484 | model.decoder.block.10.layer.2.DenseReluDense | T5DenseReluDense | 4 M \n", + "485 | model.decoder.block.10.layer.2.DenseReluDense.wi | Linear | 2 M \n", + "486 | model.decoder.block.10.layer.2.DenseReluDense.wo | Linear | 2 M \n", + "487 | model.decoder.block.10.layer.2.DenseReluDense.dropout | Dropout | 0 \n", + "488 | model.decoder.block.10.layer.2.layer_norm | T5LayerNorm | 768 \n", + "489 | model.decoder.block.10.layer.2.dropout | Dropout | 0 \n", + "490 | model.decoder.block.11 | T5Block | 9 M \n", + "491 | model.decoder.block.11.layer | ModuleList | 9 M \n", + "492 | model.decoder.block.11.layer.0 | T5LayerSelfAttention | 2 M \n", + "493 | model.decoder.block.11.layer.0.SelfAttention | T5Attention | 2 M \n", + "494 | model.decoder.block.11.layer.0.SelfAttention.q | Linear | 589 K \n", + "495 | model.decoder.block.11.layer.0.SelfAttention.k | Linear | 589 K \n", + "496 | model.decoder.block.11.layer.0.SelfAttention.v | Linear | 589 K \n", + "497 | model.decoder.block.11.layer.0.SelfAttention.o | Linear | 589 K \n", + "498 | model.decoder.block.11.layer.0.layer_norm | T5LayerNorm | 768 \n", + "499 | model.decoder.block.11.layer.0.dropout | Dropout | 0 \n", + "500 | model.decoder.block.11.layer.1 | T5LayerCrossAttention | 2 M \n", + "501 | model.decoder.block.11.layer.1.EncDecAttention | T5Attention | 2 M \n", + "502 | model.decoder.block.11.layer.1.EncDecAttention.q | Linear | 589 K \n", + "503 | model.decoder.block.11.layer.1.EncDecAttention.k | Linear | 589 K \n", + "504 | model.decoder.block.11.layer.1.EncDecAttention.v | Linear | 589 K \n", + "505 | model.decoder.block.11.layer.1.EncDecAttention.o | Linear | 589 K \n", + "506 | model.decoder.block.11.layer.1.layer_norm | T5LayerNorm | 768 \n", + "507 | model.decoder.block.11.layer.1.dropout | Dropout | 0 \n", + "508 | model.decoder.block.11.layer.2 | T5LayerFF | 4 M \n", + "509 | model.decoder.block.11.layer.2.DenseReluDense | T5DenseReluDense | 4 M \n", + "510 | model.decoder.block.11.layer.2.DenseReluDense.wi | Linear | 2 M \n", + "511 | model.decoder.block.11.layer.2.DenseReluDense.wo | Linear | 2 M \n", + "512 | model.decoder.block.11.layer.2.DenseReluDense.dropout | Dropout | 0 \n", + "513 | model.decoder.block.11.layer.2.layer_norm | T5LayerNorm | 768 \n", + "514 | model.decoder.block.11.layer.2.dropout | Dropout | 0 \n", + "515 | model.decoder.final_layer_norm | T5LayerNorm | 768 \n", + "516 | model.decoder.dropout | Dropout | 0 \n", + "517 | model.lm_head | Linear | 24 M \n", + "INFO:__main__:LOOKING AT swag_data dev\n" + ], + "name": "stderr" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "8e79d03deee94b299431330441bd64c8", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "INFO:__main__:LOOKING AT swag_data train\n" + ], + "name": "stderr" + }, + { + "output_type": "stream", + "text": [ + "\r" + ], + "name": "stdout" + }, + { + "output_type": "stream", + "text": [ + "INFO:__main__:LOOKING AT swag_data dev\n" + ], + "name": "stderr" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "68705cee3df5458fb5145046337d925c", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "/pytorch/torch/csrc/utils/python_arg_parser.cpp:756: UserWarning: This overload of add_ is deprecated:\n", + "\tadd_(Number alpha, Tensor other)\n", + "Consider using one of the following signatures instead:\n", + "\tadd_(Tensor other, *, Number alpha)\n" + ], + "name": "stderr" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "69f6eb1cb0434128961b5d83529813c5", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "INFO:__main__:***** Validation results *****\n", + "INFO:__main__:avg_val_loss = tensor(0.3535, device='cuda:0')\n", + "\n", + "INFO:__main__:loss = tensor(0.3080, device='cuda:0')\n", + "\n", + "INFO:__main__:train_loss = tensor(0.3080, device='cuda:0')\n", + "\n", + "INFO:__main__:val_loss = tensor(0.3535, device='cuda:0')\n", + "\n" + ], + "name": "stderr" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "0cfc8fa73f164b4fa5ddcbc3f115ef9b", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "INFO:__main__:***** Validation results *****\n", + "INFO:__main__:avg_train_loss = tensor(0.5107, device='cuda:0')\n", + "\n", + "INFO:__main__:avg_val_loss = tensor(0.3268, device='cuda:0')\n", + "\n", + "INFO:__main__:epoch = 0\n", + "\n", + "INFO:__main__:loss = tensor(0.5484, device='cuda:0')\n", + "\n", + "INFO:__main__:train_loss = tensor(0.5484, device='cuda:0')\n", + "\n", + "INFO:__main__:val_loss = tensor(0.3268, device='cuda:0')\n", + "\n", + "INFO:lightning:Detected KeyboardInterrupt, attempting graceful shutdown...\n" + ], + "name": "stderr" + }, + { + "output_type": "stream", + "text": [ + "\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "1" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 22 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "o1ZB_6SK7V-3" + }, + "source": [], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AgNV3TMzqSvj" + }, + "source": [ + "### Eval" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "gFFOwfXyqc4_" + }, + "source": [ + "import textwrap\n", + "from tqdm.auto import tqdm\n", + "from sklearn import metrics" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "rsYCq3Lwqc5Y", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "51f7bd88-2441-42be-e8f3-adc0337a164c" + }, + "source": [ + "dataset = SwagDataset(tokenizer, data_dir='swag_data', type_path='val')\n", + "loader = DataLoader(dataset, batch_size=32, num_workers=4)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "INFO:__main__:LOOKING AT swag_data dev\n" + ], + "name": "stderr" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "KHwMBQNjqc5h", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 66, + "referenced_widgets": [ + "1597779d89464892885045be715890a8", + "8a42468ed6b945e8bfce1803f3ea4452", + "f87eae824cf1492b9555b78648a9f261", + "6cd0d574b5fd43588b8d492674125218", + "17b25142ac744ba882e2bbd1f42c1db2", + "09185d325ef84c1fad7b07fbd9eeed31", + "ba31765789dc46229493674dab21921d", + "a9dd88fb73374e108482b80993b998eb" + ] + }, + "outputId": "81e7d67d-1d15-4dea-a552-695cfe8ef105" + }, + "source": [ + "model.model.eval()\n", + "outputs = []\n", + "targets = []\n", + "for batch in tqdm(loader):\n", + " outs = model.model.generate(input_ids=batch['source_ids'].cuda(),\n", + " attention_mask=batch['source_mask'].cuda(),\n", + " max_length=2)\n", + "\n", + " dec = [tokenizer.decode(ids) for ids in outs]\n", + " target = [tokenizer.decode(ids) for ids in batch[\"target_ids\"]]\n", + "\n", + " outputs.extend(dec)\n", + " targets.extend(target)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "1597779d89464892885045be715890a8", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, max=626.0), HTML(value='')))" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ZbTValmYq15r" + }, + "source": [ + "for i, out in enumerate(outputs):\n", + " if out not in \"1234\":\n", + " print(i, 'detected invalid prediction')" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "jN35n2pas-pF", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "be8a3507-8e66-479d-c41c-dd9cb0603742" + }, + "source": [ + "metrics.accuracy_score(targets, outputs)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.7397280815755274" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 28 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "t_WaMutznvGb" + }, + "source": [ + "This is great! We have achieved almost 74% accuracy with this simple formulation. This is great becuase with BERT like models to make a prediction on single example the model needs to do 4 forward passes, one for each possible endings and then the logits are concatenated together for all 4 passes and then passed through final softmax layer to produce 4 probabilities. This approach needs only a single pass for one example." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "rFgOHlW_tHPd" + }, + "source": [], + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file