diff --git "a/prediction_sinhala.ipynb" "b/prediction_sinhala.ipynb" new file mode 100644--- /dev/null +++ "b/prediction_sinhala.ipynb" @@ -0,0 +1,3865 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "1edd00396f2d45a7b32079d43bc62634": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_a96994a464df43918566f6cc967e7148", + "IPY_MODEL_92143f1854c44349a3d0f6b7838b6a5c", + "IPY_MODEL_b90793e5e29c435cab6fd7b1e059c992" + ], + "layout": "IPY_MODEL_8ce96d68c1e443b28e1200f106fefb02" + } + }, + "a96994a464df43918566f6cc967e7148": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_dcfa2646664e449c98a00e89b2b7984d", + "placeholder": "​", + "style": "IPY_MODEL_28bcfe01e8a64ba08ce62e9715ad85e4", + "value": "tokenizer_config.json: 100%" + } + }, + "92143f1854c44349a3d0f6b7838b6a5c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1b72e7f7e85a49fb8c7a79bce1989647", + "max": 1375, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_76523fc98b644aaaaf6c605544e9fffb", + "value": 1375 + } + }, + "b90793e5e29c435cab6fd7b1e059c992": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b2d2a9eabbe14cddaa7d0aa39e7a1953", + "placeholder": "​", + "style": "IPY_MODEL_bf4db198f72441b48a5dbff8515a1f91", + "value": " 1.38k/1.38k [00:00<00:00, 36.2kB/s]" + } + }, + "8ce96d68c1e443b28e1200f106fefb02": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "dcfa2646664e449c98a00e89b2b7984d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "28bcfe01e8a64ba08ce62e9715ad85e4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "1b72e7f7e85a49fb8c7a79bce1989647": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "76523fc98b644aaaaf6c605544e9fffb": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "b2d2a9eabbe14cddaa7d0aa39e7a1953": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "bf4db198f72441b48a5dbff8515a1f91": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "48e8e488c27a4948a455835f6caf2ce2": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_2fb5a8ac30ed49df93056bc6802e8ee0", + "IPY_MODEL_c29fcea40de347bf9f274f375b9123a5", + "IPY_MODEL_00cd02215e1f4225a4cf93b46b9a7e15" + ], + "layout": "IPY_MODEL_e1064a6f8bfd4435a6ad15d08ff44699" + } + }, + "2fb5a8ac30ed49df93056bc6802e8ee0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_956884e6e8bc43f4bc51a2b75c131889", + "placeholder": "​", + "style": "IPY_MODEL_4e23a4eec35f4f8ea3114d9cb0ea1e04", + "value": "vocab.json: 100%" + } + }, + "c29fcea40de347bf9f274f375b9123a5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e7a3964adec34bf6b37f52cf1119fa9c", + "max": 1500217, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_22a128f583aa4514a1e71d0f8aaf8e79", + "value": 1500217 + } + }, + "00cd02215e1f4225a4cf93b46b9a7e15": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3f902b8cb652446c84609cd730a64e35", + "placeholder": "​", + "style": "IPY_MODEL_a78b7664a1e346f181b203bb1645eb9b", + "value": " 1.50M/1.50M [00:00<00:00, 9.80MB/s]" + } + }, + "e1064a6f8bfd4435a6ad15d08ff44699": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "956884e6e8bc43f4bc51a2b75c131889": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4e23a4eec35f4f8ea3114d9cb0ea1e04": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "e7a3964adec34bf6b37f52cf1119fa9c": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "22a128f583aa4514a1e71d0f8aaf8e79": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "3f902b8cb652446c84609cd730a64e35": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a78b7664a1e346f181b203bb1645eb9b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "3bf7edeed06a4ef3b1ce28f24201c84b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_205cbd07f34345c48b1e72bc2cb9a93b", + "IPY_MODEL_a2d328e2313a49aab752cd2ba38220b5", + "IPY_MODEL_a3696eb89c4e434683bb5416d91602db" + ], + "layout": "IPY_MODEL_e04dfc9c2e5f437c8dd9b15f33c04a4a" + } + }, + "205cbd07f34345c48b1e72bc2cb9a93b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1f5b368654494327bfc8d1c315f13832", + "placeholder": "​", + "style": "IPY_MODEL_853eb13a56ec4fbf89e25f333798132e", + "value": "merges.txt: 100%" + } + }, + "a2d328e2313a49aab752cd2ba38220b5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5713250ff3864029a3668c6a7eb1f3e3", + "max": 1146413, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_944ed217567144459ea5279c34f529f3", + "value": 1146413 + } + }, + "a3696eb89c4e434683bb5416d91602db": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1e0faaa1a09f42f8964a3203472f50c2", + "placeholder": "​", + "style": "IPY_MODEL_7c3da939876e4a6f8f2969fbf96bbcd0", + "value": " 1.15M/1.15M [00:00<00:00, 14.1MB/s]" + } + }, + "e04dfc9c2e5f437c8dd9b15f33c04a4a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1f5b368654494327bfc8d1c315f13832": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "853eb13a56ec4fbf89e25f333798132e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "5713250ff3864029a3668c6a7eb1f3e3": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "944ed217567144459ea5279c34f529f3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "1e0faaa1a09f42f8964a3203472f50c2": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7c3da939876e4a6f8f2969fbf96bbcd0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "aacafd29b7b5403bb8a7df1ebe2a731e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_ec944b4365c34ab6813af9d925e2a552", + "IPY_MODEL_8d48d360da5945bfbf300ae455043c07", + "IPY_MODEL_36f7f5ae075f4c59a44283e25088eaab" + ], + "layout": "IPY_MODEL_8cfa54ddcf354e0e9f71102656a744cf" + } + }, + "ec944b4365c34ab6813af9d925e2a552": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_edcb22216cac4bcf83a301d975f20d2d", + "placeholder": "​", + "style": "IPY_MODEL_3ace28614fe446f18268578e56b5ec14", + "value": "tokenizer.json: 100%" + } + }, + "8d48d360da5945bfbf300ae455043c07": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3c8f3c862f744ddba9524079c636124a", + "max": 3529879, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_36eb36dba13d4ad4a73b401e0dc22c42", + "value": 3529879 + } + }, + "36f7f5ae075f4c59a44283e25088eaab": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_236bb6124df1443684b16dd34fba2ed4", + "placeholder": "​", + "style": "IPY_MODEL_3264137a43cc4725948aab030421b24c", + "value": " 3.53M/3.53M [00:00<00:00, 22.8MB/s]" + } + }, + "8cfa54ddcf354e0e9f71102656a744cf": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "edcb22216cac4bcf83a301d975f20d2d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3ace28614fe446f18268578e56b5ec14": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "3c8f3c862f744ddba9524079c636124a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "36eb36dba13d4ad4a73b401e0dc22c42": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "236bb6124df1443684b16dd34fba2ed4": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3264137a43cc4725948aab030421b24c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "dd76625672d74095a0f691206646fbd8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_a10c0e99afb546d79fca304e8a8e6ab1", + "IPY_MODEL_6fa7911781ef42949e56c80dc1f85299", + "IPY_MODEL_832e08b2b4524680bca9d71c363f3232" + ], + "layout": "IPY_MODEL_10ec65b4df86458ba2eee2eeccdc91e6" + } + }, + "a10c0e99afb546d79fca304e8a8e6ab1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_764921df2d7b438b8a8ad0d7b68b8b6e", + "placeholder": "​", + "style": "IPY_MODEL_12f7b959b26b472099ea4e06b606772a", + "value": "special_tokens_map.json: 100%" + } + }, + "6fa7911781ef42949e56c80dc1f85299": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_32c6cb10b1e946a89a7b08505c1582e9", + "max": 957, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_3ee11622c9a0405abf5f246720d358b2", + "value": 957 + } + }, + "832e08b2b4524680bca9d71c363f3232": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d78ced95d2134e299573c1e8d712b3f8", + "placeholder": "​", + "style": "IPY_MODEL_6ff7b4b691a74f6ab232c26ee55b9982", + "value": " 957/957 [00:00<00:00, 43.5kB/s]" + } + }, + "10ec65b4df86458ba2eee2eeccdc91e6": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "764921df2d7b438b8a8ad0d7b68b8b6e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "12f7b959b26b472099ea4e06b606772a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "32c6cb10b1e946a89a7b08505c1582e9": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3ee11622c9a0405abf5f246720d358b2": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "d78ced95d2134e299573c1e8d712b3f8": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6ff7b4b691a74f6ab232c26ee55b9982": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "2cc542103450405a853945ff07471932": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_5807fb12507f4fc0aaa4d083477aba27", + "IPY_MODEL_11d2ce4ace194bb1825fefacf3cc36f0", + "IPY_MODEL_b22724e628384993aa52c255c8bffc6f" + ], + "layout": "IPY_MODEL_e4dc10b2ef9c4a1f99689ebe9d48886a" + } + }, + "5807fb12507f4fc0aaa4d083477aba27": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6b580a221e2f48b5b058c6a6189cc99d", + "placeholder": "​", + "style": "IPY_MODEL_e5f30b22e01c4b74a2f20f3b9880d7ae", + "value": "config.json: 100%" + } + }, + "11d2ce4ace194bb1825fefacf3cc36f0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e353ffe187d94729a65453fcd3d8a9a3", + "max": 696, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_b056d5ef8b9244828e10f685e38d47bb", + "value": 696 + } + }, + "b22724e628384993aa52c255c8bffc6f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_32ce45ecf0d64c81bea12692ad52ed45", + "placeholder": "​", + "style": "IPY_MODEL_b930858da1a2407ba49a8a8a17a1fc70", + "value": " 696/696 [00:00<00:00, 35.8kB/s]" + } + }, + "e4dc10b2ef9c4a1f99689ebe9d48886a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6b580a221e2f48b5b058c6a6189cc99d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e5f30b22e01c4b74a2f20f3b9880d7ae": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "e353ffe187d94729a65453fcd3d8a9a3": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b056d5ef8b9244828e10f685e38d47bb": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "32ce45ecf0d64c81bea12692ad52ed45": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b930858da1a2407ba49a8a8a17a1fc70": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "d366271fc98943aa8fe3da314c1e95dc": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_364ab880ac304099b1bb83a92d6a7eed", + "IPY_MODEL_4a8d5b1ca09d493c8b93baf92e7fd5ae", + "IPY_MODEL_c6867a46aa064e26831be8a3a4278905" + ], + "layout": "IPY_MODEL_053fc43d2aad4aeeb114b8fd9aa2aef6" + } + }, + "364ab880ac304099b1bb83a92d6a7eed": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a9583dca84a14e9bb89e58a430c391e3", + "placeholder": "​", + "style": "IPY_MODEL_15b6e4ff685f4f5e8c997e151d2b4007", + "value": "pytorch_model.bin: 100%" + } + }, + "4a8d5b1ca09d493c8b93baf92e7fd5ae": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d73e215040114141bc60b0f58a1f8646", + "max": 506353257, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_18ab1cf13c494b02af122ee0069e3c92", + "value": 506353257 + } + }, + "c6867a46aa064e26831be8a3a4278905": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_839cabb3e69c46549665769afcc24e08", + "placeholder": "​", + "style": "IPY_MODEL_56fe41cf83454378a9345ee4e9a26192", + "value": " 506M/506M [00:09<00:00, 57.3MB/s]" + } + }, + "053fc43d2aad4aeeb114b8fd9aa2aef6": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a9583dca84a14e9bb89e58a430c391e3": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "15b6e4ff685f4f5e8c997e151d2b4007": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "d73e215040114141bc60b0f58a1f8646": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "18ab1cf13c494b02af122ee0069e3c92": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "839cabb3e69c46549665769afcc24e08": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "56fe41cf83454378a9345ee4e9a26192": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "EWlgdx43A_NL", + "outputId": "a2431529-0c26-4076-a6cf-4c9146c4f9b0" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Mounted at /content/drive\n" + ] + } + ], + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/drive')" + ] + }, + { + "cell_type": "code", + "source": [ + "! pip install faknow\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "7dssDha5BGNL", + "outputId": "5d303d54-6c32-482d-83e7-c46a4506cbce" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting faknow\n", + " Downloading faknow-0.0.3-py3-none-any.whl (147 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m147.6/147.6 kB\u001b[0m \u001b[31m1.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: transformers>=4.26.1 in /usr/local/lib/python3.10/dist-packages (from faknow) (4.38.2)\n", + "Requirement already satisfied: numpy>=1.23.4 in /usr/local/lib/python3.10/dist-packages (from faknow) (1.25.2)\n", + "Requirement already satisfied: pandas>=1.5.2 in /usr/local/lib/python3.10/dist-packages (from faknow) (1.5.3)\n", + "Requirement already satisfied: scikit-learn>=1.1.3 in /usr/local/lib/python3.10/dist-packages (from faknow) (1.2.2)\n", + "Requirement already satisfied: tensorboard>=2.10.0 in /usr/local/lib/python3.10/dist-packages (from faknow) (2.15.2)\n", + "Requirement already satisfied: tqdm>=4.64.1 in /usr/local/lib/python3.10/dist-packages (from faknow) (4.66.2)\n", + "Requirement already satisfied: jieba>=0.42.1 in /usr/local/lib/python3.10/dist-packages (from faknow) (0.42.1)\n", + "Requirement already satisfied: gensim>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from faknow) (4.3.2)\n", + "Requirement already satisfied: pillow>=9.3.0 in /usr/local/lib/python3.10/dist-packages (from faknow) (9.4.0)\n", + "Requirement already satisfied: nltk>=3.7 in /usr/local/lib/python3.10/dist-packages (from faknow) (3.8.1)\n", + "Collecting sphinx-markdown-tables>=0.0.17 (from faknow)\n", + " Downloading sphinx_markdown_tables-0.0.17-py3-none-any.whl (28 kB)\n", + "Requirement already satisfied: scipy>=1.7.0 in /usr/local/lib/python3.10/dist-packages (from gensim>=4.2.0->faknow) (1.11.4)\n", + "Requirement already satisfied: smart-open>=1.8.1 in /usr/local/lib/python3.10/dist-packages (from gensim>=4.2.0->faknow) (6.4.0)\n", + "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk>=3.7->faknow) (8.1.7)\n", + "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk>=3.7->faknow) (1.3.2)\n", + "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.10/dist-packages (from nltk>=3.7->faknow) (2023.12.25)\n", + "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.5.2->faknow) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.5.2->faknow) (2023.4)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=1.1.3->faknow) (3.3.0)\n", + "Requirement already satisfied: markdown>=3.4 in /usr/local/lib/python3.10/dist-packages (from sphinx-markdown-tables>=0.0.17->faknow) (3.5.2)\n", + "Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (1.4.0)\n", + "Requirement already satisfied: grpcio>=1.48.2 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (1.62.0)\n", + "Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (2.27.0)\n", + "Requirement already satisfied: google-auth-oauthlib<2,>=0.5 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (1.2.0)\n", + "Requirement already satisfied: protobuf!=4.24.0,>=3.19.6 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (3.20.3)\n", + "Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (2.31.0)\n", + "Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (67.7.2)\n", + "Requirement already satisfied: six>1.9 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (1.16.0)\n", + "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (0.7.2)\n", + "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (3.0.1)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers>=4.26.1->faknow) (3.13.1)\n", + "Requirement already satisfied: huggingface-hub<1.0,>=0.19.3 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.26.1->faknow) (0.20.3)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.26.1->faknow) (23.2)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.26.1->faknow) (6.0.1)\n", + "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.26.1->faknow) (0.15.2)\n", + "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.26.1->faknow) (0.4.2)\n", + "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard>=2.10.0->faknow) (5.3.3)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard>=2.10.0->faknow) (0.3.0)\n", + "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard>=2.10.0->faknow) (4.9)\n", + "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from google-auth-oauthlib<2,>=0.5->tensorboard>=2.10.0->faknow) (1.3.1)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers>=4.26.1->faknow) (2023.6.0)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers>=4.26.1->faknow) (4.10.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard>=2.10.0->faknow) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard>=2.10.0->faknow) (3.6)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard>=2.10.0->faknow) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard>=2.10.0->faknow) (2024.2.2)\n", + "Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.10/dist-packages (from werkzeug>=1.0.1->tensorboard>=2.10.0->faknow) (2.1.5)\n", + "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard>=2.10.0->faknow) (0.5.1)\n", + "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<2,>=0.5->tensorboard>=2.10.0->faknow) (3.2.2)\n", + "Installing collected packages: sphinx-markdown-tables, faknow\n", + "Successfully installed faknow-0.0.3 sphinx-markdown-tables-0.0.17\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "Zo3_tdxod_tn" + }, + "outputs": [], + "source": [ + "from typing import List, Optional, Tuple\n", + "\n", + "import torch\n", + "from torch import Tensor\n", + "from torch import nn\n", + "from transformers import RobertaModel\n", + "\n", + "from faknow.model.layers.layer import TextCNNLayer\n", + "from faknow.model.model import AbstractModel\n", + "import pandas as pd\n", + "\n", + "\n", + "class _MLP(nn.Module):\n", + " def __init__(self,\n", + " input_dim: int,\n", + " embed_dims: List[int],\n", + " dropout_rate: float,\n", + " output_layer=True):\n", + " super().__init__()\n", + " layers = list()\n", + " for embed_dim in embed_dims:\n", + " layers.append(nn.Linear(input_dim, embed_dim))\n", + " layers.append(nn.BatchNorm1d(embed_dim))\n", + " layers.append(nn.ReLU())\n", + " layers.append(nn.Dropout(p=dropout_rate))\n", + " input_dim = embed_dim\n", + " if output_layer:\n", + " layers.append(torch.nn.Linear(input_dim, 1))\n", + " self.mlp = torch.nn.Sequential(*layers)\n", + "\n", + " def forward(self, x: Tensor) -> Tensor:\n", + " \"\"\"\n", + "\n", + " Args:\n", + " x (Tensor): shared feature from domain and text, shape=(batch_size, embed_dim)\n", + "\n", + " \"\"\"\n", + " return self.mlp(x)\n", + "\n", + "\n", + "class _MaskAttentionLayer(torch.nn.Module):\n", + " \"\"\"\n", + " Compute attention layer\n", + " \"\"\"\n", + " def __init__(self, input_size: int):\n", + " super(_MaskAttentionLayer, self).__init__()\n", + " self.attention_layer = torch.nn.Linear(input_size, 1)\n", + "\n", + " def forward(self,\n", + " inputs: Tensor,\n", + " mask: Optional[Tensor] = None) -> Tuple[Tensor, Tensor]:\n", + " weights = self.attention_layer(inputs).view(-1, inputs.size(1))\n", + " if mask is not None:\n", + " weights = weights.masked_fill(mask == 0, float(\"-inf\"))\n", + " weights = torch.softmax(weights, dim=-1).unsqueeze(1)\n", + " outputs = torch.matmul(weights, inputs).squeeze(1)\n", + " return outputs, weights\n", + "\n", + "\n", + "class MDFEND(AbstractModel):\n", + " r\"\"\"\n", + " MDFEND: Multi-domain Fake News Detection, CIKM 2021\n", + " paper: https://dl.acm.org/doi/10.1145/3459637.3482139\n", + " code: https://github.com/kennqiang/MDFEND-Weibo21\n", + " \"\"\"\n", + " def __init__(self,\n", + " pre_trained_bert_name: str,\n", + " domain_num: int,\n", + " mlp_dims: Optional[List[int]] = None,\n", + " dropout_rate=0.2,\n", + " expert_num=5):\n", + " \"\"\"\n", + "\n", + " Args:\n", + " pre_trained_bert_name (str): the name or local path of pre-trained bert model\n", + " domain_num (int): total number of all domains\n", + " mlp_dims (List[int]): a list of the dimensions in MLP layer, if None, [384] will be taken as default, default=384\n", + " dropout_rate (float): rate of Dropout layer, default=0.2\n", + " expert_num (int): number of experts also called TextCNNLayer, default=5\n", + " \"\"\"\n", + " super(MDFEND, self).__init__()\n", + " self.domain_num = domain_num\n", + " self.expert_num = expert_num\n", + " self.bert = RobertaModel.from_pretrained(\n", + " pre_trained_bert_name).requires_grad_(False)\n", + " self.embedding_size = self.bert.config.hidden_size\n", + " self.loss_func = nn.BCELoss()\n", + " if mlp_dims is None:\n", + " mlp_dims = [384]\n", + "\n", + " filter_num = 64\n", + " filter_sizes = [1, 2, 3, 5, 10]\n", + " experts = [\n", + " TextCNNLayer(self.embedding_size, filter_num, filter_sizes)\n", + " for _ in range(self.expert_num)\n", + " ]\n", + " self.experts = nn.ModuleList(experts)\n", + "\n", + " self.gate = nn.Sequential(\n", + " nn.Linear(self.embedding_size * 2, mlp_dims[-1]), nn.ReLU(),\n", + " nn.Linear(mlp_dims[-1], self.expert_num), nn.Softmax(dim=1))\n", + "\n", + " self.attention = _MaskAttentionLayer(self.embedding_size)\n", + "\n", + " self.domain_embedder = nn.Embedding(num_embeddings=self.domain_num,\n", + " embedding_dim=self.embedding_size)\n", + " self.classifier = _MLP(320, mlp_dims, dropout_rate)\n", + "\n", + " def forward(self, token_id: Tensor, mask: Tensor,\n", + " domain: Tensor) -> Tensor:\n", + " \"\"\"\n", + "\n", + " Args:\n", + " token_id (Tensor): token ids from bert tokenizer, shape=(batch_size, max_len)\n", + " mask (Tensor): mask from bert tokenizer, shape=(batch_size, max_len)\n", + " domain (Tensor): domain id, shape=(batch_size,)\n", + "\n", + " Returns:\n", + " FloatTensor: the prediction of being fake, shape=(batch_size,)\n", + " \"\"\"\n", + " text_embedding = self.bert(token_id,\n", + " attention_mask=mask).last_hidden_state\n", + " attention_feature, _ = self.attention(text_embedding, mask)\n", + "\n", + " domain_embedding = self.domain_embedder(domain.view(-1, 1)).squeeze(1)\n", + "\n", + " gate_input = torch.cat([domain_embedding, attention_feature], dim=-1)\n", + " gate_output = self.gate(gate_input)\n", + "\n", + " shared_feature = 0\n", + " for i in range(self.expert_num):\n", + " expert_feature = self.experts[i](text_embedding)\n", + " shared_feature += (expert_feature * gate_output[:, i].unsqueeze(1))\n", + "\n", + " label_pred = self.classifier(shared_feature)\n", + "\n", + " return torch.sigmoid(label_pred.squeeze(1))\n", + "\n", + " def calculate_loss(self, data) -> Tensor:\n", + " \"\"\"\n", + " calculate loss via BCELoss\n", + "\n", + " Args:\n", + " data (dict): batch data dict\n", + "\n", + " Returns:\n", + " loss (Tensor): loss value\n", + " \"\"\"\n", + "\n", + " token_ids = data['text']['token_id']\n", + " masks = data['text']['mask']\n", + " domains = data['domain']\n", + " labels = data['label']\n", + " output = self.forward(token_ids, masks, domains)\n", + " return self.loss_func(output, labels.float())\n", + "\n", + " def predict(self, data_without_label) -> Tensor:\n", + " \"\"\"\n", + " predict the probability of being fake news\n", + "\n", + " Args:\n", + " data_without_label (Dict[str, Any]): batch data dict\n", + "\n", + " Returns:\n", + " Tensor: one-hot probability, shape=(batch_size, 2)\n", + " \"\"\"\n", + "\n", + " token_ids = data_without_label['text']['token_id']\n", + " masks = data_without_label['text']['mask']\n", + " domains = data_without_label['domain']\n", + "\n", + "\n", + " output_prob = self.forward(token_ids, masks,domains)\n", + "\n", + " return output_prob" + ] + }, + { + "cell_type": "code", + "source": [ + "from faknow.data.dataset.text import TextDataset\n", + "from faknow.data.process.text_process import TokenizerFromPreTrained\n", + "from faknow.evaluate.evaluator import Evaluator\n", + "\n", + "import torch\n", + "from torch.utils.data import DataLoader" + ], + "metadata": { + "id": "Tg2zBjzUBTbt" + }, + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "testing_path = \"/content/drive/MyDrive/sinhala-dataset/test_data.json\"\n" + ], + "metadata": { + "id": "Ls-xo82WBbUg" + }, + "execution_count": 5, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "df = pd.read_json(testing_path)\n", + "df.head()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "by3bnTMCMh6K", + "outputId": "bdc10951-f15e-4918-b7cd-84dd5535b4e5" + }, + "execution_count": 7, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " text domain label\n", + "0 @USER ඔයාලට වගේ කැත තඩි බඩවල් අපිට නැ 0 1\n", + "1 @USER , @USER @USER ඒක තමයි දේසිකා එහෙනම් මට ක... 0 0\n", + "2 ඒකි ඒම නෑ බං # jaysays 0 0\n", + "3 @USER ඔයා නං හෙන ගොං අප්ප . මෝඩ කමෙන්ට්සමයි දා... 0 1\n", + "4 කවුරුහරි මොනවහරි ගෙනාවම , අපිත් කාල අම්මට ඉතිර... 0 0" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
textdomainlabel
0@USER ඔයාලට වගේ කැත තඩි බඩවල් අපිට නැ01
1@USER , @USER @USER ඒක තමයි දේසිකා එහෙනම් මට ක...00
2ඒකි ඒම නෑ බං # jaysays00
3@USER ඔයා නං හෙන ගොං අප්ප . මෝඩ කමෙන්ට්සමයි දා...01
4කවුරුහරි මොනවහරි ගෙනාවම , අපිත් කාල අම්මට ඉතිර...00
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "df", + "summary": "{\n \"name\": \"df\",\n \"rows\": 5000,\n \"fields\": [\n {\n \"column\": \"text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5000,\n \"samples\": [\n \"\\u0d87\\u0dba\\u0dd2 \\u0dba\\u0d9a\\u0ddd \\u0dbd\\u0d82\\u0d9a\\u0dcf\\u0dc0\\u0dda \\u0db6\\u0dd9\\u0dbb\\u0dd2 ! \\u0daf\\u0dd3\\u0db4\\u0dbd\\u0dca\\u0dbd\\u0dcf # \\u0dc0\\u0dd9\\u0dbb\\u0dd2 # \\u0d9c\\u0dbd\\u0dca\\u0db6\\u0ddd\\u0dad\\u0dbd\\u0dca # GenElecSL # SriLanka # TamilNadu # Election2015\",\n \"@USER @USER \\u0d9a\\u0dcf\\u0dbd\\u0d9a\\u0db1\\u0dca\\u0db1\\u0dd2 \\u0d95\\u0d9a\\u0dd4\\u0db1\\u0d9c\\u0dd9 \\u0db4\\u0dd4\\u0d9a \\u0db8\\u0dc4\\u0dbd\\u0dcf \\u0db6\\u0da9\\u0dba\\u0db1\\u0dca\\u0db1 \\u0db6\\u0dd9\\u0dc4\\u0dd9\\u0dad\\u0dca \\u0daf\\u0dd9\\u0db1\\u0dca\\u0db1 \\u0d95\\u0db1\\u0dcf\",\n \"\\u0dc3\\u0dd3\\u0dbd\\u0dcf\\u0dc0\\u0dad\\u0dd4\\u0dbb \\u0db1\\u0dcf / \\u0dc3\\u0dd9\\u0db6\\u0dc5\\u0dd4 \\u0dad\\u0dd2\\u0daf\\u0dd9\\u0db1\\u0d9a\\u0dd4\\u0da7 \\u0db4\\u0dc4\\u0dbb\\u0daf\\u0dd3 \\u0dc3\\u0dd9\\u0db6\\u0dbd\\u0dd9\\u0d9a\\u0dca \\u0dc4\\u0dd2\\u0dbb\\u0d9a\\u0dbb\\u0dba\\u0dd2 \\u0db6\\u0dda\\u0dbb\\u0dcf\\u0d9c\\u0dad\\u0dca\\u0dad\\u0dd9 \\u0db1\\u0dd3\\u0dad\\u0dd2\\u0db8\\u0dba \\u0db4\\u0dd2\\u0dba\\u0dc0\\u0dbb \\u0db1\\u0ddc\\u0d9c\\u0db1\\u0dca\\u0db1 \\u0db4\\u0ddc\\u0dbb\\u0ddc\\u0db1\\u0dca\\u0daf\\u0dd4\\u0dc0 \\u0db8\\u0dad\\u0dba \\u0db1\\u0ddc\\u0daf\\u0d9a\\u0dd2\\u0db1\\u0dca URL via @USER\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"domain\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 2,\n \"num_unique_values\": 3,\n \"samples\": [\n 0,\n 1,\n 2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 7 + } + ] + }, + { + "cell_type": "code", + "source": [ + "df =df[:100]" + ], + "metadata": { + "id": "LX0T74ZtM9j9" + }, + "execution_count": 8, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "df[\"label\"] = int(0)" + ], + "metadata": { + "id": "60iL_I8ONCts" + }, + "execution_count": 9, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "df.head()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "SE4yeguZNIo-", + "outputId": "110eb559-0dd0-4f2e-cb1c-694100365a31" + }, + "execution_count": 10, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " text domain label\n", + "0 @USER ඔයාලට වගේ කැත තඩි බඩවල් අපිට නැ 0 0\n", + "1 @USER , @USER @USER ඒක තමයි දේසිකා එහෙනම් මට ක... 0 0\n", + "2 ඒකි ඒම නෑ බං # jaysays 0 0\n", + "3 @USER ඔයා නං හෙන ගොං අප්ප . මෝඩ කමෙන්ට්සමයි දා... 0 0\n", + "4 කවුරුහරි මොනවහරි ගෙනාවම , අපිත් කාල අම්මට ඉතිර... 0 0" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
textdomainlabel
0@USER ඔයාලට වගේ කැත තඩි බඩවල් අපිට නැ00
1@USER , @USER @USER ඒක තමයි දේසිකා එහෙනම් මට ක...00
2ඒකි ඒම නෑ බං # jaysays00
3@USER ඔයා නං හෙන ගොං අප්ප . මෝඩ කමෙන්ට්සමයි දා...00
4කවුරුහරි මොනවහරි ගෙනාවම , අපිත් කාල අම්මට ඉතිර...00
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "df", + "summary": "{\n \"name\": \"df\",\n \"rows\": 100,\n \"fields\": [\n {\n \"column\": \"text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 100,\n \"samples\": [\n \"\\u0d89\\u0dc3\\u0dca\\u0dc3\\u0dbb \\u0d8b\\u0db6 \\u0dc0\\u0dbd\\u0dd2\\u0dba\\u0d9a\\u0dca \\u0db1\\u0db8\\u0dca \\u0dbd\\u0ddc\\u0dc0\\u0dd9\\u0dad\\u0dca \\u0d9a\\u0dd2\\u0dba\\u0db1\\u0ddc \\u0d9c\\u0dd2\\u0dc4\\u0dcf\\u0db1\\u0dca \\u0d85\\u0dba\\u0dd2\\u0dba\\u0dcf \\u0d89\\u0d9a\\u0dca\\u0db8\\u0db1\\u0da7 \\u0dc0\\u0dbb\\u0dd9\\u0db1\\u0dca \\u0d9a\\u0dd2\\u0dba\\u0dbd\\u0dcf \\u0d85\\u0da9 \\u0d9c\\u0dc4\\u0dbd\\u0dcf \\u0d9a\\u0dd2\\u0dba\\u0db1\\u0dca\\u0db1\\u0dda \\u0dad\\u0ddc \\u0daf\\u0dd0\\u0db1\\u0dca \\u0dc0\\u0dd9\\u0db1\\u0dc3\\u0dca \\u0d9a\\u0db8\\u0dca \\u0d9a\\u0dbb\\u0db1\\u0dc0\\u0dcf \\u0db8\\u0db1\\u0dca \\u0dad\\u0ddc\\u0da7 \\u0dc0\\u0ddb\\u0dbb\\u0dba\",\n \"\\u0d85\\u0db1\\u0dd4\\u0dbb\\u0d9c\\u0dd9 \\u0dc3\\u0da7\\u0dca\\u0da7\\u0dd0\\u0db9\\u0dd2\\u0dba\\u0db1\\u0dca\\u0d9c\\u0dd9 \\u0d89\\u0dad\\u0dca\\u0dad\\u0dd1\\u0dc0\\u0ddc \\u0daf\\u0dd4\\u0d91\\u0d85\\u0db1 \\u0d91\\u0d9a \\u0d85\\u0dc4\\u0db1\\u0dca\\u0db1\\u0daf\\u0dd9\\u0dba\\u0d9a\\u0dca\\u0daf \\u0dad\\u0dc0 \\u0db6\\u0dd0\\u0db1\\u0db4\\u0dbd\\u0dca\\u0dbd \\u0da7\\u0dca\\u0d9c\\u0dc0 \\u0db8\\u0da9\\u0d9c\\u0dc4\\u0db4\\u0dbd\\u0dca\\u0dbd \\u0d8b\\u0db9\\u0dbd\\u0d9c\\u0dd9 \\u0dc3\\u0d9a\\u0dca\\u0d9a\\u0dd2\\u0dbd\\u0dd2 \\u0db4\\u0dbb \\u0d9c\\u0dad\\u0dd2 \\u0dbd\\u0ddd\\u0d9a\\u0dd9\\u0da7\\u0db8 \\u0db4\\u0dd9\\u0db1\\u0dca\\u0db1\\u0db4\\u0dbd\\u0dca\\u0dbd . % \\u0da7 \\u0d87\\u0daf\\u0dbd \\u0daf\\u0dd0\\u0db8\\u0dca\\u0db8\\u0dd9\\u0dad\\u0dca \\u0db8\\u0dda\\u0d9a\\u0dd9 \\u0d87\\u0db8\\u0db1\\u0dd9\\u0db1 \\u0dc3\\u0dd2\\u0db4\\u0dca\\u0db4\\u0dd2\\u0d9a\\u0da7\\u0dd4\",\n \"\\u0db8\\u0dbd\\u0dca\\u0dbd\\u0dd2\\u0d9c\\u0dd9 \\u0d8b\\u0db4\\u0db1\\u0dca\\u0daf\\u0dd2\\u0db1\\u0dda\\u0da7 \\u0dc4\\u0dd9\\u0da7 \\u0daf\\u0dd9\\u0db1\\u0dca\\u0db1 \\u0daf\\u0dd9\\u0dba\\u0d9a\\u0dca \\u0d9a\\u0dbd\\u0dca\\u0db4\\u0db1\\u0dcf \\u0d9a\\u0dbb \\u0d9a\\u0dbb \\u0d89\\u0daf\\u0dca\\u0daf\\u0dd2 \\u0dbd\\u0ddc\\u0d9a\\u0dca\\u0d9a\\u0dcf \\u0daf\\u0dd4\\u0db1\\u0dca\\u0db1 \\u0dc3\\u0db4\\u0dca\\u200d\\u0dbb\\u0dcf\\u0dba\\u0dd2\\u0dc3\\u0dca \\u0d91\\u0d9a \\u0db1\\u0db8\\u0dca \\u0db4\\u0da7\\u0dca\\u0da7 . . \\u0dbb\\u0dad\\u0dd2\\u0da4\\u0dca\\u0da4\\u0dcf \\u0db4\\u0dd9\\u0da7\\u0dca\\u0da7\\u0dd2\\u0dba\\u0d9a\\u0dd2\\u0db1\\u0dca \\u0dc2\\u0dda\\u0db4\\u0dca \\u0d8b\\u0db1\\u0dd1\\u0d9a\\u0dd2 \\u0dc4\\u0dd0\\u0db8\\u0dd2\\u0db1\\u0dda\\u0dc2\\u0db1\\u0dca ourNation HappyBirthday\\u0d92\\u0d85\\u0dc3\\u0dca\\u0dc3\\u0dda\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"domain\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 2,\n \"num_unique_values\": 3,\n \"samples\": [\n 0,\n 1,\n 2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 0,\n \"num_unique_values\": 1,\n \"samples\": [\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 10 + } + ] + }, + { + "cell_type": "code", + "source": [ + "print(len(df))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "zTRfsZ_tNLif", + "outputId": "d0012de3-5298-4be5-b280-dee66208a034" + }, + "execution_count": 11, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "100\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "path = '/content/drive/MyDrive/sinhala-dataset'\n", + "testing_json = \"/testing.json\"" + ], + "metadata": { + "id": "weZ2_xujNW1b" + }, + "execution_count": 12, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "df.to_json(path + testing_json, orient='records')\n" + ], + "metadata": { + "id": "HzAfca0LNUDx" + }, + "execution_count": 13, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 400, + "referenced_widgets": [ + "1edd00396f2d45a7b32079d43bc62634", + "a96994a464df43918566f6cc967e7148", + "92143f1854c44349a3d0f6b7838b6a5c", + "b90793e5e29c435cab6fd7b1e059c992", + "8ce96d68c1e443b28e1200f106fefb02", + "dcfa2646664e449c98a00e89b2b7984d", + "28bcfe01e8a64ba08ce62e9715ad85e4", + "1b72e7f7e85a49fb8c7a79bce1989647", + "76523fc98b644aaaaf6c605544e9fffb", + "b2d2a9eabbe14cddaa7d0aa39e7a1953", + "bf4db198f72441b48a5dbff8515a1f91", + "48e8e488c27a4948a455835f6caf2ce2", + "2fb5a8ac30ed49df93056bc6802e8ee0", + "c29fcea40de347bf9f274f375b9123a5", + "00cd02215e1f4225a4cf93b46b9a7e15", + "e1064a6f8bfd4435a6ad15d08ff44699", + "956884e6e8bc43f4bc51a2b75c131889", + "4e23a4eec35f4f8ea3114d9cb0ea1e04", + "e7a3964adec34bf6b37f52cf1119fa9c", + "22a128f583aa4514a1e71d0f8aaf8e79", + "3f902b8cb652446c84609cd730a64e35", + "a78b7664a1e346f181b203bb1645eb9b", + "3bf7edeed06a4ef3b1ce28f24201c84b", + "205cbd07f34345c48b1e72bc2cb9a93b", + "a2d328e2313a49aab752cd2ba38220b5", + "a3696eb89c4e434683bb5416d91602db", + "e04dfc9c2e5f437c8dd9b15f33c04a4a", + "1f5b368654494327bfc8d1c315f13832", + "853eb13a56ec4fbf89e25f333798132e", + "5713250ff3864029a3668c6a7eb1f3e3", + "944ed217567144459ea5279c34f529f3", + "1e0faaa1a09f42f8964a3203472f50c2", + "7c3da939876e4a6f8f2969fbf96bbcd0", + "aacafd29b7b5403bb8a7df1ebe2a731e", + "ec944b4365c34ab6813af9d925e2a552", + "8d48d360da5945bfbf300ae455043c07", + "36f7f5ae075f4c59a44283e25088eaab", + "8cfa54ddcf354e0e9f71102656a744cf", + "edcb22216cac4bcf83a301d975f20d2d", + "3ace28614fe446f18268578e56b5ec14", + "3c8f3c862f744ddba9524079c636124a", + "36eb36dba13d4ad4a73b401e0dc22c42", + "236bb6124df1443684b16dd34fba2ed4", + "3264137a43cc4725948aab030421b24c", + "dd76625672d74095a0f691206646fbd8", + "a10c0e99afb546d79fca304e8a8e6ab1", + "6fa7911781ef42949e56c80dc1f85299", + "832e08b2b4524680bca9d71c363f3232", + "10ec65b4df86458ba2eee2eeccdc91e6", + "764921df2d7b438b8a8ad0d7b68b8b6e", + "12f7b959b26b472099ea4e06b606772a", + "32c6cb10b1e946a89a7b08505c1582e9", + "3ee11622c9a0405abf5f246720d358b2", + "d78ced95d2134e299573c1e8d712b3f8", + "6ff7b4b691a74f6ab232c26ee55b9982", + "2cc542103450405a853945ff07471932", + "5807fb12507f4fc0aaa4d083477aba27", + "11d2ce4ace194bb1825fefacf3cc36f0", + "b22724e628384993aa52c255c8bffc6f", + "e4dc10b2ef9c4a1f99689ebe9d48886a", + "6b580a221e2f48b5b058c6a6189cc99d", + "e5f30b22e01c4b74a2f20f3b9880d7ae", + "e353ffe187d94729a65453fcd3d8a9a3", + "b056d5ef8b9244828e10f685e38d47bb", + "32ce45ecf0d64c81bea12692ad52ed45", + "b930858da1a2407ba49a8a8a17a1fc70", + "d366271fc98943aa8fe3da314c1e95dc", + "364ab880ac304099b1bb83a92d6a7eed", + "4a8d5b1ca09d493c8b93baf92e7fd5ae", + "c6867a46aa064e26831be8a3a4278905", + "053fc43d2aad4aeeb114b8fd9aa2aef6", + "a9583dca84a14e9bb89e58a430c391e3", + "15b6e4ff685f4f5e8c997e151d2b4007", + "d73e215040114141bc60b0f58a1f8646", + "18ab1cf13c494b02af122ee0069e3c92", + "839cabb3e69c46549665769afcc24e08", + "56fe41cf83454378a9345ee4e9a26192" + ] + }, + "id": "ROUE4LV1d_tp", + "outputId": "7f3d865e-97c6-434e-a8ae-f69a3462586a" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:80: UserWarning: \n", + "Access to the secret `HF_TOKEN` has not been granted on this notebook.\n", + "You will not be requested again.\n", + "Please restart the session if you want to be prompted again.\n", + " warnings.warn(\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "tokenizer_config.json: 0%| | 0.00/1.38k [00:00" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ], + "source": [ + "MODEL_SAVE_PATH = \"/content/drive/MyDrive/models-path-improvement/last-epoch-model-2024-03-08-15_34_03_6.pth\"\n", + "\n", + "max_len, bert = 160 , 'sinhala-nlp/sinbert-sold-si'\n", + "tokenizer = TokenizerFromPreTrained(max_len, bert)\n", + "\n", + "# dataset\n", + "batch_size = 100\n", + "\n", + "\n", + "testing_path = path + testing_json\n", + "\n", + "testing_set = TextDataset(testing_path, ['text'], tokenizer)\n", + "testing_loader = DataLoader(testing_set, batch_size, shuffle=False)\n", + "\n", + "# prepare model\n", + "domain_num = 3\n", + "\n", + "model = MDFEND(bert, domain_num , expert_num=18 , mlp_dims = [5080 ,4020, 3010, 2024 ,1012 ,606 , 400])\n", + "model.load_state_dict(torch.load(f=MODEL_SAVE_PATH, map_location=torch.device('cpu')))\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "source": [ + "outputs = []\n", + "for batch_data in testing_loader:\n", + " outputs.append(model.predict(batch_data))" + ], + "metadata": { + "id": "nsTmmtm7ENK7" + }, + "execution_count": 15, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "outputs" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "MgJFRW6uOTNK", + "outputId": "37176cb8-b2e6-4c3b-c852-c4e3a17cda30" + }, + "execution_count": 16, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[tensor([1.3248e-03, 2.0616e-01, 4.5341e-02, 9.3156e-01, 2.3167e-01, 9.9967e-01,\n", + " 6.8980e-02, 8.8265e-01, 4.6962e-01, 1.4711e-01, 3.9079e-01, 1.5254e-02,\n", + " 1.4336e-01, 9.9974e-01, 9.4320e-02, 9.6368e-01, 3.0400e-01, 1.1099e-02,\n", + " 8.6662e-01, 9.0376e-02, 4.0686e-01, 9.9839e-01, 9.9700e-01, 4.9826e-02,\n", + " 9.6036e-01, 3.1445e-02, 7.8756e-01, 5.2800e-01, 9.4090e-01, 9.9148e-01,\n", + " 9.9725e-01, 1.6041e-02, 2.9223e-01, 1.5572e-01, 7.2350e-02, 8.2344e-02,\n", + " 5.4701e-03, 7.9817e-01, 1.6082e-03, 2.3789e-01, 2.0766e-02, 9.8514e-01,\n", + " 1.4062e-02, 9.8410e-01, 5.0685e-01, 1.0039e-01, 3.5957e-01, 4.6990e-01,\n", + " 6.0348e-01, 5.4888e-01, 9.7326e-02, 1.4647e-03, 2.0198e-02, 9.9995e-01,\n", + " 8.6098e-01, 7.3051e-01, 3.0538e-03, 9.9967e-01, 5.4075e-03, 2.4586e-02,\n", + " 2.1326e-01, 9.9988e-01, 7.7565e-01, 7.3468e-01, 8.2214e-02, 8.3052e-03,\n", + " 3.7278e-01, 3.6124e-01, 2.4839e-01, 2.4560e-01, 3.9281e-02, 9.9611e-01,\n", + " 2.3351e-02, 1.9584e-01, 1.1381e-01, 2.4559e-01, 6.5344e-01, 3.5736e-01,\n", + " 8.6219e-04, 3.8071e-01, 5.6490e-01, 2.5499e-02, 6.1897e-02, 9.0802e-01,\n", + " 8.2842e-02, 8.5183e-04, 6.4453e-01, 6.0612e-01, 3.8544e-01, 4.0832e-02,\n", + " 6.0973e-01, 4.7808e-02, 7.0927e-01, 8.4603e-01, 5.6889e-01, 5.3337e-01,\n", + " 1.2113e-01, 8.6022e-01, 8.5642e-03, 9.9990e-01],\n", + " grad_fn=)]" + ] + }, + "metadata": {}, + "execution_count": 16 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# 1 ====> offensive\n", + "# 0 ====> not offensive\n", + "label = []\n", + "for output in outputs:\n", + " for out in output:\n", + " output_prob = out.item()\n", + " if output_prob >= 0.5:\n", + " label.append(1)\n", + " else:\n", + " label.append(0)\n" + ], + "metadata": { + "id": "ySdut6vMOvQY" + }, + "execution_count": 20, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "label" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "8thgk3ykPTOD", + "outputId": "fb05dd91-01d4-44a8-ef66-caf0eda24831" + }, + "execution_count": 21, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[0,\n", + " 0,\n", + " 0,\n", + " 1,\n", + " 0,\n", + " 1,\n", + " 0,\n", + " 1,\n", + " 0,\n", + " 0,\n", + " 0,\n", + " 0,\n", + " 0,\n", + " 1,\n", + " 0,\n", + " 1,\n", + " 0,\n", + " 0,\n", + " 1,\n", + " 0,\n", + " 0,\n", + " 1,\n", + " 1,\n", + " 0,\n", + " 1,\n", + " 0,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 0,\n", + " 0,\n", + " 0,\n", + " 0,\n", + " 0,\n", + " 0,\n", + " 1,\n", + " 0,\n", + " 0,\n", + " 0,\n", + " 1,\n", + " 0,\n", + " 1,\n", + " 1,\n", + " 0,\n", + " 0,\n", + " 0,\n", + " 1,\n", + " 1,\n", + " 0,\n", + " 0,\n", + " 0,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 0,\n", + " 1,\n", + " 0,\n", + " 0,\n", + " 0,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 0,\n", + " 0,\n", + " 0,\n", + " 0,\n", + " 0,\n", + " 0,\n", + " 0,\n", + " 1,\n", + " 0,\n", + " 0,\n", + " 0,\n", + " 0,\n", + " 1,\n", + " 0,\n", + " 0,\n", + " 0,\n", + " 1,\n", + " 0,\n", + " 0,\n", + " 1,\n", + " 0,\n", + " 0,\n", + " 1,\n", + " 1,\n", + " 0,\n", + " 0,\n", + " 1,\n", + " 0,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 0,\n", + " 1,\n", + " 0,\n", + " 1]" + ] + }, + "metadata": {}, + "execution_count": 21 + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "P4eIsw1DPYQG" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file