diff --git "a/HabibiTranslator.ipynb" "b/HabibiTranslator.ipynb" new file mode 100644--- /dev/null +++ "b/HabibiTranslator.ipynb" @@ -0,0 +1,3497 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU", + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "4936c0a76984492f88275a1500cf394f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_4f189acddd754998acd4b4f9777469c4", + "IPY_MODEL_ba8cec14e32648f4847b2194618d0757", + "IPY_MODEL_2fa1d931574e4b118e524d3c252cc6e8" + ], + "layout": "IPY_MODEL_ae617cff68f24e09a3c6b0ad96f219dc" + } + }, + "4f189acddd754998acd4b4f9777469c4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f84ff4aa43fa4d778611f988215adc43", + "placeholder": "​", + "style": "IPY_MODEL_26ea2db896ef456b92e80f196172efce", + "value": "README.md: 100%" + } + }, + "ba8cec14e32648f4847b2194618d0757": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5c93f4c064a143b0b954a76194701100", + "max": 12098, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_84229337aea946b1bfb9f4c394276791", + "value": 12098 + } + }, + "2fa1d931574e4b118e524d3c252cc6e8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2954dab4bb914153acf9eb5527c8ef58", + "placeholder": "​", + "style": "IPY_MODEL_25dd04b3a0674b539ac56147477f5a64", + "value": " 12.1k/12.1k [00:00<00:00, 412kB/s]" + } + }, + "ae617cff68f24e09a3c6b0ad96f219dc": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f84ff4aa43fa4d778611f988215adc43": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "26ea2db896ef456b92e80f196172efce": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "5c93f4c064a143b0b954a76194701100": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "84229337aea946b1bfb9f4c394276791": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "2954dab4bb914153acf9eb5527c8ef58": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "25dd04b3a0674b539ac56147477f5a64": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "76944c7c2ec147b29705cf13841891c7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_b9ed8a9466894003bf1bf35e6923ace2", + "IPY_MODEL_866ce20493ad47048c3cb87aad0e5909", + "IPY_MODEL_a8254a2af6a4431b81352e1eff75fcef" + ], + "layout": "IPY_MODEL_5009ae410a2c4d3b91999ab0e35c6c86" + } + }, + "b9ed8a9466894003bf1bf35e6923ace2": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9db7fd2603c945d797239e2cba155e9b", + "placeholder": "​", + "style": "IPY_MODEL_cf2612b047574ecc9d209c2689795c6d", + "value": "tatoeba_mt.py: 100%" + } + }, + "866ce20493ad47048c3cb87aad0e5909": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3120cfab5cd44f08af6944d9cd3d7659", + "max": 15499, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_fd0e167445dd44cc83007e5aeefb6dc8", + "value": 15499 + } + }, + "a8254a2af6a4431b81352e1eff75fcef": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_34332f0ec0054de082528326af5bc4b2", + "placeholder": "​", + "style": "IPY_MODEL_01826f384beb4abf84382add81b931b0", + "value": " 15.5k/15.5k [00:00<00:00, 1.10MB/s]" + } + }, + "5009ae410a2c4d3b91999ab0e35c6c86": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9db7fd2603c945d797239e2cba155e9b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "cf2612b047574ecc9d209c2689795c6d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "3120cfab5cd44f08af6944d9cd3d7659": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fd0e167445dd44cc83007e5aeefb6dc8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "34332f0ec0054de082528326af5bc4b2": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "01826f384beb4abf84382add81b931b0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "eaef0d163ed14989b44a1d3f21d59ce3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_3ffe76ba7da14e98aca4dfe47b3076c6", + "IPY_MODEL_88168392fcb34d40a7c99f402e730393", + "IPY_MODEL_5ee168f8431249d7900001c84ecf20b7" + ], + "layout": "IPY_MODEL_59b62288d28048ff9c691f79f4ad11b0" + } + }, + "3ffe76ba7da14e98aca4dfe47b3076c6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_fe6523eb73194f15a2428b542ae868e4", + "placeholder": "​", + "style": "IPY_MODEL_4ccccd3029124a72be368bd724f9072c", + "value": "dataset_infos.json: 100%" + } + }, + "88168392fcb34d40a7c99f402e730393": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_749c1b04cdb14c87b5ab95c86dcdd5f6", + "max": 1958806, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_09cd81148c4845958c087ee8c7b5451c", + "value": 1958806 + } + }, + "5ee168f8431249d7900001c84ecf20b7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9847b47b4310469ca2523199b3257e3a", + "placeholder": "​", + "style": "IPY_MODEL_ddc0dcd753834e02b6d4057155dcb0a1", + "value": " 1.96M/1.96M [00:00<00:00, 6.76MB/s]" + } + }, + "59b62288d28048ff9c691f79f4ad11b0": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fe6523eb73194f15a2428b542ae868e4": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4ccccd3029124a72be368bd724f9072c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "749c1b04cdb14c87b5ab95c86dcdd5f6": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "09cd81148c4845958c087ee8c7b5451c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "9847b47b4310469ca2523199b3257e3a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ddc0dcd753834e02b6d4057155dcb0a1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "bd58c69cf5ad494eaaf12bbf10f66661": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_c948d400029a4ee6b7ac40faebfe17e7", + "IPY_MODEL_64f42dabd3eb48069167da0251f2d8fb", + "IPY_MODEL_504de4c8b10041b3863a9cbe040d5cf6" + ], + "layout": "IPY_MODEL_6568660add1448ac81df0c6a67c47d66" + } + }, + "c948d400029a4ee6b7ac40faebfe17e7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5ec8f06d517c4f7bb3ab639f91b792f3", + "placeholder": "​", + "style": "IPY_MODEL_b7efe138f73a4811a14fd91d5f18766c", + "value": "tatoeba-test.ara-eng.tsv: 100%" + } + }, + "64f42dabd3eb48069167da0251f2d8fb": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_99757ac60afb4aea8d77e49e199a74b0", + "max": 938171, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_84da3eb589514c8a8c7fdce342ad6ac8", + "value": 938171 + } + }, + "504de4c8b10041b3863a9cbe040d5cf6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8451c69261104dc4bc06924aa6b2b28a", + "placeholder": "​", + "style": "IPY_MODEL_555ff104df5a460f9f459ec5fcffd8be", + "value": " 938k/938k [00:00<00:00, 12.9MB/s]" + } + }, + "6568660add1448ac81df0c6a67c47d66": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5ec8f06d517c4f7bb3ab639f91b792f3": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b7efe138f73a4811a14fd91d5f18766c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "99757ac60afb4aea8d77e49e199a74b0": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "84da3eb589514c8a8c7fdce342ad6ac8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "8451c69261104dc4bc06924aa6b2b28a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "555ff104df5a460f9f459ec5fcffd8be": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "1466ecf8d0ad429a97ee8126c03dd78b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_c836fef6c5f64691988b7294d32ae0ae", + "IPY_MODEL_b7303fdb822b4f559c136fd62781411e", + "IPY_MODEL_c9b3815919124920b9f5cd593f75c4b5" + ], + "layout": "IPY_MODEL_4f462747f6524ab296f43975840b3468" + } + }, + "c836fef6c5f64691988b7294d32ae0ae": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_94ee64b3583d4826abef918380b4fc5a", + "placeholder": "​", + "style": "IPY_MODEL_dd24590438d04d8c94493f81794d73ea", + "value": "tatoeba-dev.ara-eng.tsv: 100%" + } + }, + "b7303fdb822b4f559c136fd62781411e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3c04db996c374b5fb58541a10fc20d25", + "max": 1778245, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_b8fecaed88f341ada9c54993fb06eb67", + "value": 1778245 + } + }, + "c9b3815919124920b9f5cd593f75c4b5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_643834dda8f149559b1a610b610ca37b", + "placeholder": "​", + "style": "IPY_MODEL_09b958f9ce894514970adc140ed97ff4", + "value": " 1.78M/1.78M [00:00<00:00, 27.7MB/s]" + } + }, + "4f462747f6524ab296f43975840b3468": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "94ee64b3583d4826abef918380b4fc5a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "dd24590438d04d8c94493f81794d73ea": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "3c04db996c374b5fb58541a10fc20d25": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b8fecaed88f341ada9c54993fb06eb67": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "643834dda8f149559b1a610b610ca37b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "09b958f9ce894514970adc140ed97ff4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "49bc9b93252c459b905ddc7a03dfed71": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_145e2e17263f4d798185b561941594e6", + "IPY_MODEL_870fafbef85f4009b86a868b253f5691", + "IPY_MODEL_ac290a5547fc41a28ee6669a6f79e259" + ], + "layout": "IPY_MODEL_71d4d789d5f546d0b2a877cf17c588ec" + } + }, + "145e2e17263f4d798185b561941594e6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_74186ec4da634d229d9a68af07dd48e0", + "placeholder": "​", + "style": "IPY_MODEL_0eee54a4e2104331a32be3fcd5b1300e", + "value": "Generating test split: 100%" + } + }, + "870fafbef85f4009b86a868b253f5691": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_fdb0faa095b34c59a0875b1d1262dc24", + "max": 10304, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_d97b2fa0c0b341af9d62cfe8e39694c9", + "value": 10304 + } + }, + "ac290a5547fc41a28ee6669a6f79e259": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d1a219a28ac641819659e51e6dc6f467", + "placeholder": "​", + "style": "IPY_MODEL_47e2aa076a7540deb8e85d2596b9c2cc", + "value": " 10304/10304 [00:00<00:00, 38777.70 examples/s]" + } + }, + "71d4d789d5f546d0b2a877cf17c588ec": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "74186ec4da634d229d9a68af07dd48e0": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0eee54a4e2104331a32be3fcd5b1300e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "fdb0faa095b34c59a0875b1d1262dc24": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d97b2fa0c0b341af9d62cfe8e39694c9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "d1a219a28ac641819659e51e6dc6f467": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "47e2aa076a7540deb8e85d2596b9c2cc": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "f105c630bd4548ee953bfcb321194118": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_d41c6bb5990f4dd690d44f226d05fd74", + "IPY_MODEL_7660d0e619e14f5b91fc04dee179bb4c", + "IPY_MODEL_f12ba11fb38e4df98b96847d28e0c456" + ], + "layout": "IPY_MODEL_b858da19009a4bb795ccf8e5edd321c5" + } + }, + "d41c6bb5990f4dd690d44f226d05fd74": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e07084d61862429989a8c1d75b0204c3", + "placeholder": "​", + "style": "IPY_MODEL_4e19113ec8824d7dbbd9ba93dd92cbb6", + "value": "Generating validation split: 100%" + } + }, + "7660d0e619e14f5b91fc04dee179bb4c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4f0c1475c59e4880b06023f0c919c320", + "max": 19528, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_703e40ca5e1b4077a9d21a3927ab075b", + "value": 19528 + } + }, + "f12ba11fb38e4df98b96847d28e0c456": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5f64b257808d462bbabfb53c29bfba4d", + "placeholder": "​", + "style": "IPY_MODEL_ae32cbf6e2544ab4bc61dfc54ba937c4", + "value": " 19528/19528 [00:00<00:00, 46798.95 examples/s]" + } + }, + "b858da19009a4bb795ccf8e5edd321c5": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e07084d61862429989a8c1d75b0204c3": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4e19113ec8824d7dbbd9ba93dd92cbb6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "4f0c1475c59e4880b06023f0c919c320": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "703e40ca5e1b4077a9d21a3927ab075b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "5f64b257808d462bbabfb53c29bfba4d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ae32cbf6e2544ab4bc61dfc54ba937c4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "3aoxODC3qIl7", + "outputId": "77715d63-626b-45ae-f18e-fe13ae97e474" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting gradio\n", + " Downloading gradio-5.20.1-py3-none-any.whl.metadata (16 kB)\n", + "Collecting datasets\n", + " Downloading datasets-3.3.2-py3-none-any.whl.metadata (19 kB)\n", + "Collecting aiofiles<24.0,>=22.0 (from gradio)\n", + " Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)\n", + "Requirement already satisfied: anyio<5.0,>=3.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (3.7.1)\n", + "Collecting fastapi<1.0,>=0.115.2 (from gradio)\n", + " Downloading fastapi-0.115.11-py3-none-any.whl.metadata (27 kB)\n", + "Collecting ffmpy (from gradio)\n", + " Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)\n", + "Collecting gradio-client==1.7.2 (from gradio)\n", + " Downloading gradio_client-1.7.2-py3-none-any.whl.metadata (7.1 kB)\n", + "Collecting groovy~=0.1 (from gradio)\n", + " Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)\n", + "Requirement already satisfied: httpx>=0.24.1 in /usr/local/lib/python3.11/dist-packages (from gradio) (0.28.1)\n", + "Requirement already satisfied: huggingface-hub>=0.28.1 in /usr/local/lib/python3.11/dist-packages (from gradio) (0.28.1)\n", + "Requirement already satisfied: jinja2<4.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (3.1.6)\n", + "Collecting markupsafe~=2.0 (from gradio)\n", + " Downloading MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)\n", + "Requirement already satisfied: numpy<3.0,>=1.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (1.26.4)\n", + "Requirement already satisfied: orjson~=3.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (3.10.15)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from gradio) (24.2)\n", + "Requirement already satisfied: pandas<3.0,>=1.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (2.2.2)\n", + "Requirement already satisfied: pillow<12.0,>=8.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (11.1.0)\n", + "Requirement already satisfied: pydantic>=2.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (2.10.6)\n", + "Collecting pydub (from gradio)\n", + " Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)\n", + "Collecting python-multipart>=0.0.18 (from gradio)\n", + " Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)\n", + "Requirement already satisfied: pyyaml<7.0,>=5.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (6.0.2)\n", + "Collecting ruff>=0.9.3 (from gradio)\n", + " Downloading ruff-0.10.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)\n", + "Collecting safehttpx<0.2.0,>=0.1.6 (from gradio)\n", + " Downloading safehttpx-0.1.6-py3-none-any.whl.metadata (4.2 kB)\n", + "Collecting semantic-version~=2.0 (from gradio)\n", + " Downloading semantic_version-2.10.0-py2.py3-none-any.whl.metadata (9.7 kB)\n", + "Collecting starlette<1.0,>=0.40.0 (from gradio)\n", + " Downloading starlette-0.46.1-py3-none-any.whl.metadata (6.2 kB)\n", + "Collecting tomlkit<0.14.0,>=0.12.0 (from gradio)\n", + " Downloading tomlkit-0.13.2-py3-none-any.whl.metadata (2.7 kB)\n", + "Requirement already satisfied: typer<1.0,>=0.12 in /usr/local/lib/python3.11/dist-packages (from gradio) (0.15.2)\n", + "Requirement already satisfied: typing-extensions~=4.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (4.12.2)\n", + "Collecting uvicorn>=0.14.0 (from gradio)\n", + " Downloading uvicorn-0.34.0-py3-none-any.whl.metadata (6.5 kB)\n", + "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from gradio-client==1.7.2->gradio) (2024.10.0)\n", + "Requirement already satisfied: websockets<16.0,>=10.0 in /usr/local/lib/python3.11/dist-packages (from gradio-client==1.7.2->gradio) (14.2)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from datasets) (3.17.0)\n", + "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.11/dist-packages (from datasets) (18.1.0)\n", + "Collecting dill<0.3.9,>=0.3.0 (from datasets)\n", + " Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n", + "Requirement already satisfied: requests>=2.32.2 in /usr/local/lib/python3.11/dist-packages (from datasets) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.66.3 in /usr/local/lib/python3.11/dist-packages (from datasets) (4.67.1)\n", + "Collecting xxhash (from datasets)\n", + " Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n", + "Collecting multiprocess<0.70.17 (from datasets)\n", + " Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)\n", + "Requirement already satisfied: aiohttp in /usr/local/lib/python3.11/dist-packages (from datasets) (3.11.13)\n", + "Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.11/dist-packages (from anyio<5.0,>=3.0->gradio) (3.10)\n", + "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.11/dist-packages (from anyio<5.0,>=3.0->gradio) (1.3.1)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (2.5.0)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (1.3.2)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (25.1.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (1.5.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (6.1.0)\n", + "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (0.3.0)\n", + "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (1.18.3)\n", + "Requirement already satisfied: certifi in /usr/local/lib/python3.11/dist-packages (from httpx>=0.24.1->gradio) (2025.1.31)\n", + "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.11/dist-packages (from httpx>=0.24.1->gradio) (1.0.7)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.11/dist-packages (from httpcore==1.*->httpx>=0.24.1->gradio) (0.14.0)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3.0,>=1.0->gradio) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3.0,>=1.0->gradio) (2025.1)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3.0,>=1.0->gradio) (2025.1)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic>=2.0->gradio) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic>=2.0->gradio) (2.27.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets) (3.4.1)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets) (2.3.0)\n", + "Requirement already satisfied: click>=8.0.0 in /usr/local/lib/python3.11/dist-packages (from typer<1.0,>=0.12->gradio) (8.1.8)\n", + "Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.11/dist-packages (from typer<1.0,>=0.12->gradio) (1.5.4)\n", + "Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.11/dist-packages (from typer<1.0,>=0.12->gradio) (13.9.4)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3.0,>=1.0->gradio) (1.17.0)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio) (2.18.0)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0,>=0.12->gradio) (0.1.2)\n", + "Downloading gradio-5.20.1-py3-none-any.whl (62.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.3/62.3 MB\u001b[0m \u001b[31m14.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading gradio_client-1.7.2-py3-none-any.whl (322 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m322.1/322.1 kB\u001b[0m \u001b[31m25.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading datasets-3.3.2-py3-none-any.whl (485 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m485.4/485.4 kB\u001b[0m \u001b[31m37.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading aiofiles-23.2.1-py3-none-any.whl (15 kB)\n", + "Downloading dill-0.3.8-py3-none-any.whl (116 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m12.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading fastapi-0.115.11-py3-none-any.whl (94 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m94.9/94.9 kB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading groovy-0.1.2-py3-none-any.whl (14 kB)\n", + "Downloading MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (28 kB)\n", + "Downloading multiprocess-0.70.16-py311-none-any.whl (143 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.5/143.5 kB\u001b[0m \u001b[31m14.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading python_multipart-0.0.20-py3-none-any.whl (24 kB)\n", + "Downloading ruff-0.10.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.3/11.3 MB\u001b[0m \u001b[31m94.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading safehttpx-0.1.6-py3-none-any.whl (8.7 kB)\n", + "Downloading semantic_version-2.10.0-py2.py3-none-any.whl (15 kB)\n", + "Downloading starlette-0.46.1-py3-none-any.whl (71 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m72.0/72.0 kB\u001b[0m \u001b[31m6.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading tomlkit-0.13.2-py3-none-any.whl (37 kB)\n", + "Downloading uvicorn-0.34.0-py3-none-any.whl (62 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.3/62.3 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading ffmpy-0.5.0-py3-none-any.whl (6.0 kB)\n", + "Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n", + "Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.8/194.8 kB\u001b[0m \u001b[31m17.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: pydub, xxhash, uvicorn, tomlkit, semantic-version, ruff, python-multipart, markupsafe, groovy, ffmpy, dill, aiofiles, starlette, multiprocess, safehttpx, gradio-client, fastapi, gradio, datasets\n", + " Attempting uninstall: markupsafe\n", + " Found existing installation: MarkupSafe 3.0.2\n", + " Uninstalling MarkupSafe-3.0.2:\n", + " Successfully uninstalled MarkupSafe-3.0.2\n", + "Successfully installed aiofiles-23.2.1 datasets-3.3.2 dill-0.3.8 fastapi-0.115.11 ffmpy-0.5.0 gradio-5.20.1 gradio-client-1.7.2 groovy-0.1.2 markupsafe-2.1.5 multiprocess-0.70.16 pydub-0.25.1 python-multipart-0.0.20 ruff-0.10.0 safehttpx-0.1.6 semantic-version-2.10.0 starlette-0.46.1 tomlkit-0.13.2 uvicorn-0.34.0 xxhash-3.5.0\n" + ] + } + ], + "source": [ + "!pip install gradio datasets" + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install torch==2.0.1 torchtext==0.15.2" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "rBDxfZ_NqS27", + "outputId": "9890ee2f-16dd-4383-c55d-efbf8b9c208a" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting torch==2.0.1\n", + " Downloading torch-2.0.1-cp311-cp311-manylinux1_x86_64.whl.metadata (24 kB)\n", + "Collecting torchtext==0.15.2\n", + " Downloading torchtext-0.15.2-cp311-cp311-manylinux1_x86_64.whl.metadata (7.4 kB)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from torch==2.0.1) (3.17.0)\n", + "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.11/dist-packages (from torch==2.0.1) (4.12.2)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.11/dist-packages (from torch==2.0.1) (1.13.1)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch==2.0.1) (3.4.2)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch==2.0.1) (3.1.6)\n", + "Collecting nvidia-cuda-nvrtc-cu11==11.7.99 (from torch==2.0.1)\n", + " Downloading nvidia_cuda_nvrtc_cu11-11.7.99-2-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-cuda-runtime-cu11==11.7.99 (from torch==2.0.1)\n", + " Downloading nvidia_cuda_runtime_cu11-11.7.99-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)\n", + "Collecting nvidia-cuda-cupti-cu11==11.7.101 (from torch==2.0.1)\n", + " Downloading nvidia_cuda_cupti_cu11-11.7.101-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)\n", + "Collecting nvidia-cudnn-cu11==8.5.0.96 (from torch==2.0.1)\n", + " Downloading nvidia_cudnn_cu11-8.5.0.96-2-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)\n", + "Collecting nvidia-cublas-cu11==11.10.3.66 (from torch==2.0.1)\n", + " Downloading nvidia_cublas_cu11-11.10.3.66-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)\n", + "Collecting nvidia-cufft-cu11==10.9.0.58 (from torch==2.0.1)\n", + " Downloading nvidia_cufft_cu11-10.9.0.58-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-curand-cu11==10.2.10.91 (from torch==2.0.1)\n", + " Downloading nvidia_curand_cu11-10.2.10.91-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)\n", + "Collecting nvidia-cusolver-cu11==11.4.0.1 (from torch==2.0.1)\n", + " Downloading nvidia_cusolver_cu11-11.4.0.1-2-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)\n", + "Collecting nvidia-cusparse-cu11==11.7.4.91 (from torch==2.0.1)\n", + " Downloading nvidia_cusparse_cu11-11.7.4.91-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)\n", + "Collecting nvidia-nccl-cu11==2.14.3 (from torch==2.0.1)\n", + " Downloading nvidia_nccl_cu11-2.14.3-py3-none-manylinux1_x86_64.whl.metadata (1.8 kB)\n", + "Collecting nvidia-nvtx-cu11==11.7.91 (from torch==2.0.1)\n", + " Downloading nvidia_nvtx_cu11-11.7.91-py3-none-manylinux1_x86_64.whl.metadata (1.7 kB)\n", + "Collecting triton==2.0.0 (from torch==2.0.1)\n", + " Downloading triton-2.0.0-1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.0 kB)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.11/dist-packages (from torchtext==0.15.2) (4.67.1)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from torchtext==0.15.2) (2.32.3)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (from torchtext==0.15.2) (1.26.4)\n", + "Collecting torchdata==0.6.1 (from torchtext==0.15.2)\n", + " Downloading torchdata-0.6.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)\n", + "Requirement already satisfied: setuptools in /usr/local/lib/python3.11/dist-packages (from nvidia-cublas-cu11==11.10.3.66->torch==2.0.1) (75.1.0)\n", + "Requirement already satisfied: wheel in /usr/local/lib/python3.11/dist-packages (from nvidia-cublas-cu11==11.10.3.66->torch==2.0.1) (0.45.1)\n", + "Requirement already satisfied: urllib3>=1.25 in /usr/local/lib/python3.11/dist-packages (from torchdata==0.6.1->torchtext==0.15.2) (2.3.0)\n", + "Requirement already satisfied: cmake in /usr/local/lib/python3.11/dist-packages (from triton==2.0.0->torch==2.0.1) (3.31.6)\n", + "Collecting lit (from triton==2.0.0->torch==2.0.1)\n", + " Downloading lit-18.1.8-py3-none-any.whl.metadata (2.5 kB)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch==2.0.1) (2.1.5)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->torchtext==0.15.2) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->torchtext==0.15.2) (3.10)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->torchtext==0.15.2) (2025.1.31)\n", + "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy->torch==2.0.1) (1.3.0)\n", + "Downloading torch-2.0.1-cp311-cp311-manylinux1_x86_64.whl (619.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m619.9/619.9 MB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading torchtext-0.15.2-cp311-cp311-manylinux1_x86_64.whl (2.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m47.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cublas_cu11-11.10.3.66-py3-none-manylinux1_x86_64.whl (317.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m317.1/317.1 MB\u001b[0m \u001b[31m4.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cuda_cupti_cu11-11.7.101-py3-none-manylinux1_x86_64.whl (11.8 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.8/11.8 MB\u001b[0m \u001b[31m80.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cuda_nvrtc_cu11-11.7.99-2-py3-none-manylinux1_x86_64.whl (21.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.0/21.0 MB\u001b[0m \u001b[31m85.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cuda_runtime_cu11-11.7.99-py3-none-manylinux1_x86_64.whl (849 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m849.3/849.3 kB\u001b[0m \u001b[31m55.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cudnn_cu11-8.5.0.96-2-py3-none-manylinux1_x86_64.whl (557.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m557.1/557.1 MB\u001b[0m \u001b[31m1.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cufft_cu11-10.9.0.58-py3-none-manylinux2014_x86_64.whl (168.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m168.4/168.4 MB\u001b[0m \u001b[31m6.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_curand_cu11-10.2.10.91-py3-none-manylinux1_x86_64.whl (54.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.6/54.6 MB\u001b[0m \u001b[31m12.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cusolver_cu11-11.4.0.1-2-py3-none-manylinux1_x86_64.whl (102.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m102.6/102.6 MB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cusparse_cu11-11.7.4.91-py3-none-manylinux1_x86_64.whl (173.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m173.2/173.2 MB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_nccl_cu11-2.14.3-py3-none-manylinux1_x86_64.whl (177.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m177.1/177.1 MB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_nvtx_cu11-11.7.91-py3-none-manylinux1_x86_64.whl (98 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m98.6/98.6 kB\u001b[0m \u001b[31m9.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading torchdata-0.6.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.6/4.6 MB\u001b[0m \u001b[31m100.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading triton-2.0.0-1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (63.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m63.3/63.3 MB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading lit-18.1.8-py3-none-any.whl (96 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m96.4/96.4 kB\u001b[0m \u001b[31m9.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: lit, nvidia-nvtx-cu11, nvidia-nccl-cu11, nvidia-cusparse-cu11, nvidia-curand-cu11, nvidia-cufft-cu11, nvidia-cuda-runtime-cu11, nvidia-cuda-nvrtc-cu11, nvidia-cuda-cupti-cu11, nvidia-cublas-cu11, nvidia-cusolver-cu11, nvidia-cudnn-cu11, triton, torch, torchdata, torchtext\n", + " Attempting uninstall: triton\n", + " Found existing installation: triton 3.1.0\n", + " Uninstalling triton-3.1.0:\n", + " Successfully uninstalled triton-3.1.0\n", + " Attempting uninstall: torch\n", + " Found existing installation: torch 2.5.1+cu124\n", + " Uninstalling torch-2.5.1+cu124:\n", + " Successfully uninstalled torch-2.5.1+cu124\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "torchvision 0.20.1+cu124 requires torch==2.5.1, but you have torch 2.0.1 which is incompatible.\n", + "torchaudio 2.5.1+cu124 requires torch==2.5.1, but you have torch 2.0.1 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed lit-18.1.8 nvidia-cublas-cu11-11.10.3.66 nvidia-cuda-cupti-cu11-11.7.101 nvidia-cuda-nvrtc-cu11-11.7.99 nvidia-cuda-runtime-cu11-11.7.99 nvidia-cudnn-cu11-8.5.0.96 nvidia-cufft-cu11-10.9.0.58 nvidia-curand-cu11-10.2.10.91 nvidia-cusolver-cu11-11.4.0.1 nvidia-cusparse-cu11-11.7.4.91 nvidia-nccl-cu11-2.14.3 nvidia-nvtx-cu11-11.7.91 torch-2.0.1 torchdata-0.6.1 torchtext-0.15.2 triton-2.0.0\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "torch.cuda.is_available()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vmpc30L4qmAx", + "outputId": "cad2e4fc-b87c-4aa6-d9ad-8bdfeb55cf92" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "True" + ] + }, + "metadata": {}, + "execution_count": 3 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "import torch.nn as nn\n", + "import torch.optim as optim\n", + "import math\n", + "from datasets import load_dataset\n", + "import numpy as np\n", + "from collections import Counter\n", + "import gradio as gr\n", + "\n", + "# Seting random seed for reproducibility\n", + "torch.manual_seed(42)\n", + "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", + "\n", + "dataset = load_dataset('Helsinki-NLP/tatoeba_mt', 'ara-eng')\n", + "\n", + "# tokenization (word-level)\n", + "def tokenize(text):\n", + " return text.split()\n", + "\n", + "# Building vocabulary from dataset\n", + "def build_vocab(data, tokenizer, min_freq=2):\n", + " counter = Counter()\n", + " for example in data:\n", + " counter.update(tokenizer(example['sourceString']))\n", + " counter.update(tokenizer(example['targetString']))\n", + " # Adding special tokens\n", + " specials = ['', '', '', '']\n", + " vocab = specials + [word for word, freq in counter.items() if freq >= min_freq]\n", + " word2idx = {word: idx for idx, word in enumerate(vocab)}\n", + " idx2word = {idx: word for word, idx in word2idx.items()}\n", + " return word2idx, idx2word\n", + "\n", + "# Converting text to tensor (adjusted to fit special tokens within max_len)\n", + "def text_to_tensor(text, vocab, tokenizer, max_len=52):\n", + " tokens = tokenizer(text)[:max_len - 2] # Reserving space for and \n", + " tokens = [''] + tokens + ['']\n", + " tensor = [vocab.get(token, vocab['']) for token in tokens]\n", + " return torch.tensor(tensor, dtype=torch.long)\n", + "\n", + "train_data = dataset['validation'] # Using validation as training data for demo\n", + "test_data = dataset['test']\n", + "\n", + "# Building shared vocabulary (for simplicity, using both languages in one vocab)\n", + "word2idx, idx2word = build_vocab(train_data, tokenize)\n", + "\n", + "# Hyperparameters for data\n", + "max_len = 52 # Increased to account for and \n", + "batch_size = 32\n", + "\n", + "train_data_list = list(train_data) # Convert Dataset to list once\n", + "print(f\"Length of train_data_list: {len(train_data_list)}\")\n", + "\n", + "def get_batches(data_list, batch_size, max_len=52):\n", + " total_batches = len(data_list) // batch_size + (1 if len(data_list) % batch_size else 0)\n", + " print(f\"Total batches to process: {total_batches}\")\n", + " for i in range(0, len(data_list), batch_size):\n", + " batch = data_list[i:i + batch_size]\n", + " src_batch = [text_to_tensor(example['sourceString'], word2idx, tokenize, max_len) for example in batch]\n", + " tgt_batch = [text_to_tensor(example['targetString'], word2idx, tokenize, max_len) for example in batch]\n", + " src_batch = nn.utils.rnn.pad_sequence(src_batch, padding_value=word2idx[''], batch_first=False).to(device)\n", + " tgt_batch = nn.utils.rnn.pad_sequence(tgt_batch, padding_value=word2idx[''], batch_first=False).to(device)\n", + " if src_batch.size(0) > max_len:\n", + " src_batch = src_batch[:max_len, :]\n", + " elif src_batch.size(0) < max_len:\n", + " padding = torch.full((max_len - src_batch.size(0), src_batch.size(1)), word2idx[''], dtype=torch.long).to(device)\n", + " src_batch = torch.cat([src_batch, padding], dim=0)\n", + " if tgt_batch.size(0) > max_len:\n", + " tgt_batch = tgt_batch[:max_len, :]\n", + " elif tgt_batch.size(0) < max_len:\n", + " padding = torch.full((max_len - tgt_batch.size(0), tgt_batch.size(1)), word2idx[''], dtype=torch.long).to(device)\n", + " tgt_batch = torch.cat([tgt_batch, padding], dim=0)\n", + " src_batch = src_batch.transpose(0, 1) # [batch_size, seq_len]\n", + " tgt_batch = tgt_batch.transpose(0, 1) # [batch_size, seq_len]\n", + " yield src_batch, tgt_batch\n", + "\n", + "\n", + "print(\"Revised Chunk 1 (Seventh Iteration) completed: Dataset loaded and preprocessing debugged.\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 469, + "referenced_widgets": [ + "4936c0a76984492f88275a1500cf394f", + "4f189acddd754998acd4b4f9777469c4", + "ba8cec14e32648f4847b2194618d0757", + "2fa1d931574e4b118e524d3c252cc6e8", + "ae617cff68f24e09a3c6b0ad96f219dc", + "f84ff4aa43fa4d778611f988215adc43", + "26ea2db896ef456b92e80f196172efce", + "5c93f4c064a143b0b954a76194701100", + "84229337aea946b1bfb9f4c394276791", + "2954dab4bb914153acf9eb5527c8ef58", + "25dd04b3a0674b539ac56147477f5a64", + "76944c7c2ec147b29705cf13841891c7", + "b9ed8a9466894003bf1bf35e6923ace2", + "866ce20493ad47048c3cb87aad0e5909", + "a8254a2af6a4431b81352e1eff75fcef", + "5009ae410a2c4d3b91999ab0e35c6c86", + "9db7fd2603c945d797239e2cba155e9b", + "cf2612b047574ecc9d209c2689795c6d", + "3120cfab5cd44f08af6944d9cd3d7659", + "fd0e167445dd44cc83007e5aeefb6dc8", + "34332f0ec0054de082528326af5bc4b2", + "01826f384beb4abf84382add81b931b0", + "eaef0d163ed14989b44a1d3f21d59ce3", + "3ffe76ba7da14e98aca4dfe47b3076c6", + "88168392fcb34d40a7c99f402e730393", + "5ee168f8431249d7900001c84ecf20b7", + "59b62288d28048ff9c691f79f4ad11b0", + "fe6523eb73194f15a2428b542ae868e4", + "4ccccd3029124a72be368bd724f9072c", + "749c1b04cdb14c87b5ab95c86dcdd5f6", + "09cd81148c4845958c087ee8c7b5451c", + "9847b47b4310469ca2523199b3257e3a", + "ddc0dcd753834e02b6d4057155dcb0a1", + "bd58c69cf5ad494eaaf12bbf10f66661", + "c948d400029a4ee6b7ac40faebfe17e7", + "64f42dabd3eb48069167da0251f2d8fb", + "504de4c8b10041b3863a9cbe040d5cf6", + "6568660add1448ac81df0c6a67c47d66", + "5ec8f06d517c4f7bb3ab639f91b792f3", + "b7efe138f73a4811a14fd91d5f18766c", + "99757ac60afb4aea8d77e49e199a74b0", + "84da3eb589514c8a8c7fdce342ad6ac8", + "8451c69261104dc4bc06924aa6b2b28a", + "555ff104df5a460f9f459ec5fcffd8be", + "1466ecf8d0ad429a97ee8126c03dd78b", + "c836fef6c5f64691988b7294d32ae0ae", + "b7303fdb822b4f559c136fd62781411e", + "c9b3815919124920b9f5cd593f75c4b5", + "4f462747f6524ab296f43975840b3468", + "94ee64b3583d4826abef918380b4fc5a", + "dd24590438d04d8c94493f81794d73ea", + "3c04db996c374b5fb58541a10fc20d25", + "b8fecaed88f341ada9c54993fb06eb67", + "643834dda8f149559b1a610b610ca37b", + "09b958f9ce894514970adc140ed97ff4", + "49bc9b93252c459b905ddc7a03dfed71", + "145e2e17263f4d798185b561941594e6", + "870fafbef85f4009b86a868b253f5691", + "ac290a5547fc41a28ee6669a6f79e259", + "71d4d789d5f546d0b2a877cf17c588ec", + "74186ec4da634d229d9a68af07dd48e0", + "0eee54a4e2104331a32be3fcd5b1300e", + "fdb0faa095b34c59a0875b1d1262dc24", + "d97b2fa0c0b341af9d62cfe8e39694c9", + "d1a219a28ac641819659e51e6dc6f467", + "47e2aa076a7540deb8e85d2596b9c2cc", + "f105c630bd4548ee953bfcb321194118", + "d41c6bb5990f4dd690d44f226d05fd74", + "7660d0e619e14f5b91fc04dee179bb4c", + "f12ba11fb38e4df98b96847d28e0c456", + "b858da19009a4bb795ccf8e5edd321c5", + "e07084d61862429989a8c1d75b0204c3", + "4e19113ec8824d7dbbd9ba93dd92cbb6", + "4f0c1475c59e4880b06023f0c919c320", + "703e40ca5e1b4077a9d21a3927ab075b", + "5f64b257808d462bbabfb53c29bfba4d", + "ae32cbf6e2544ab4bc61dfc54ba937c4" + ] + }, + "id": "pT320YaDqXpA", + "outputId": "b8330b80-0072-40f8-a008-50537c372594" + }, + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n", + "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", + "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", + "You will be able to reuse this secret in all of your notebooks.\n", + "Please note that authentication is recommended but still optional to access public models or datasets.\n", + " warnings.warn(\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "README.md: 0%| | 0.00/12.1k [00:00']).unsqueeze(1).unsqueeze(2)\n", + " tgt_mask = (tgt != word2idx['']).unsqueeze(1).unsqueeze(3)\n", + " seq_len = tgt.size(1)\n", + " nopeak_mask = (1 - torch.triu(torch.ones(1, seq_len, seq_len), diagonal=1)).bool().to(device)\n", + " tgt_mask = tgt_mask & nopeak_mask\n", + " return src_mask, tgt_mask\n", + "\n", + " def forward(self, src, tgt):\n", + " src_mask, tgt_mask = self.generate_mask(src, tgt)\n", + " src_embedded = self.dropout(self.pos_encoding(self.src_embedding(src) * math.sqrt(self.d_model)))\n", + " tgt_embedded = self.dropout(self.pos_encoding(self.tgt_embedding(tgt) * math.sqrt(self.d_model)))\n", + "\n", + " enc_output = src_embedded\n", + " for enc_layer in self.encoder_layers:\n", + " enc_output = enc_layer(enc_output, src_mask)\n", + "\n", + " dec_output = tgt_embedded\n", + " for dec_layer in self.decoder_layers:\n", + " dec_output = dec_layer(dec_output, enc_output, src_mask, tgt_mask)\n", + "\n", + " return self.fc_out(dec_output)\n", + "\n", + "print(\"Revised Chunk 2 (Fourth Iteration) completed: Transformer model fixed with max_len=52.\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "gyPdrhT2qYwc", + "outputId": "92531bf9-1692-4df5-9f13-2bd7568a08c8" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Revised Chunk 2 (Fourth Iteration) completed: Transformer model fixed with max_len=52.\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "vocab_size = len(word2idx)\n", + "model = Transformer(\n", + " src_vocab_size=vocab_size,\n", + " tgt_vocab_size=vocab_size,\n", + " d_model=256,\n", + " num_heads=8,\n", + " num_layers=3,\n", + " d_ff=1024,\n", + " max_len=52,\n", + " dropout=0.1\n", + ").to(device)\n", + "\n", + "# Loss and optimizer\n", + "criterion = nn.CrossEntropyLoss(ignore_index=word2idx[''])\n", + "optimizer = optim.Adam(model.parameters(), lr=0.0001)\n", + "\n", + "# Training loop with progress feedback\n", + "def train(model, data, epochs=20):\n", + " model.train()\n", + " total_batches = len(data) // batch_size + (1 if len(data) % batch_size else 0)\n", + " print(f\"Total batches per epoch: {total_batches}\")\n", + " for epoch in range(epochs):\n", + " total_loss = 0\n", + " for batch_idx, (src_batch, tgt_batch) in enumerate(get_batches(data, batch_size, max_len=52), 1):\n", + " if batch_idx % 100 == 0: # Printing every 100 batches for feedback\n", + " print(f\"Epoch {epoch + 1}, Batch {batch_idx}/{total_batches} \")\n", + " optimizer.zero_grad()\n", + " output = model(src_batch, tgt_batch[:, :-1])\n", + " loss = criterion(output.view(-1, vocab_size), tgt_batch[:, 1:].reshape(-1))\n", + " loss.backward()\n", + " optimizer.step()\n", + " total_loss += loss.item()\n", + " avg_loss = total_loss / total_batches\n", + " print(f\"Epoch {epoch + 1}/{epochs}, Loss: {avg_loss:.4f}\")\n", + "\n", + "# Main function\n", + "def translate(model, sentence, max_len=52):\n", + " model.eval()\n", + " with torch.no_grad():\n", + " src = text_to_tensor(sentence, word2idx, tokenize, max_len).unsqueeze(0).to(device)\n", + " tgt = torch.tensor([word2idx['']], dtype=torch.long).unsqueeze(0).to(device)\n", + " for _ in range(max_len):\n", + " output = model(src, tgt)\n", + " next_token = output[:, -1, :].argmax(dim=-1).item()\n", + " if next_token == word2idx['']:\n", + " break\n", + " tgt = torch.cat([tgt, torch.tensor([[next_token]], dtype=torch.long).to(device)], dim=1)\n", + " translated = [idx2word[idx.item()] for idx in tgt[0] if idx.item() in idx2word]\n", + " return ' '.join(translated[1:])\n", + "\n", + "print(\"Starting training...\")\n", + "train(model, train_data_list)\n", + "print(\"Training completed.\")\n", + "\n", + "# Testing\n", + "test_sentence = \"عمرك رايح المكسيك؟\"\n", + "translated = translate(model, test_sentence)\n", + "print(f\"Input: {test_sentence}\")\n", + "print(f\"Translated: {translated}\")\n", + "\n", + "print(\"Chunk 3 completed: Training and inference implemented.\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "collapsed": true, + "id": "xCeVlZJHqcf2", + "outputId": "7affbe88-8c41-44bc-ef2f-ffd09f38f4eb" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Starting training...\n", + "Total batches per epoch: 611\n", + "Total batches to process: 611\n", + "Epoch 1, Batch 100/611 \n", + "Epoch 1, Batch 200/611 \n", + "Epoch 1, Batch 300/611 \n", + "Epoch 1, Batch 400/611 \n", + "Epoch 1, Batch 500/611 \n", + "Epoch 1, Batch 600/611 \n", + "Epoch 1/20, Loss: 6.1513\n", + "Total batches to process: 611\n", + "Epoch 2, Batch 100/611 \n", + "Epoch 2, Batch 200/611 \n", + "Epoch 2, Batch 300/611 \n", + "Epoch 2, Batch 400/611 \n", + "Epoch 2, Batch 500/611 \n", + "Epoch 2, Batch 600/611 \n", + "Epoch 2/20, Loss: 5.1121\n", + "Total batches to process: 611\n", + "Epoch 3, Batch 100/611 \n", + "Epoch 3, Batch 200/611 \n", + "Epoch 3, Batch 300/611 \n", + "Epoch 3, Batch 400/611 \n", + "Epoch 3, Batch 500/611 \n", + "Epoch 3, Batch 600/611 \n", + "Epoch 3/20, Loss: 4.6931\n", + "Total batches to process: 611\n", + "Epoch 4, Batch 100/611 \n", + "Epoch 4, Batch 200/611 \n", + "Epoch 4, Batch 300/611 \n", + "Epoch 4, Batch 400/611 \n", + "Epoch 4, Batch 500/611 \n", + "Epoch 4, Batch 600/611 \n", + "Epoch 4/20, Loss: 4.3833\n", + "Total batches to process: 611\n", + "Epoch 5, Batch 100/611 \n", + "Epoch 5, Batch 200/611 \n", + "Epoch 5, Batch 300/611 \n", + "Epoch 5, Batch 400/611 \n", + "Epoch 5, Batch 500/611 \n", + "Epoch 5, Batch 600/611 \n", + "Epoch 5/20, Loss: 4.1299\n", + "Total batches to process: 611\n", + "Epoch 6, Batch 100/611 \n", + "Epoch 6, Batch 200/611 \n", + "Epoch 6, Batch 300/611 \n", + "Epoch 6, Batch 400/611 \n", + "Epoch 6, Batch 500/611 \n", + "Epoch 6, Batch 600/611 \n", + "Epoch 6/20, Loss: 3.9104\n", + "Total batches to process: 611\n", + "Epoch 7, Batch 100/611 \n", + "Epoch 7, Batch 200/611 \n", + "Epoch 7, Batch 300/611 \n", + "Epoch 7, Batch 400/611 \n", + "Epoch 7, Batch 500/611 \n", + "Epoch 7, Batch 600/611 \n", + "Epoch 7/20, Loss: 3.7017\n", + "Total batches to process: 611\n", + "Epoch 8, Batch 100/611 \n", + "Epoch 8, Batch 200/611 \n", + "Epoch 8, Batch 300/611 \n", + "Epoch 8, Batch 400/611 \n", + "Epoch 8, Batch 500/611 \n", + "Epoch 8, Batch 600/611 \n", + "Epoch 8/20, Loss: 3.5075\n", + "Total batches to process: 611\n", + "Epoch 9, Batch 100/611 \n", + "Epoch 9, Batch 200/611 \n", + "Epoch 9, Batch 300/611 \n", + "Epoch 9, Batch 400/611 \n", + "Epoch 9, Batch 500/611 \n", + "Epoch 9, Batch 600/611 \n", + "Epoch 9/20, Loss: 3.3238\n", + "Total batches to process: 611\n", + "Epoch 10, Batch 100/611 \n", + "Epoch 10, Batch 200/611 \n", + "Epoch 10, Batch 300/611 \n", + "Epoch 10, Batch 400/611 \n", + "Epoch 10, Batch 500/611 \n", + "Epoch 10, Batch 600/611 \n", + "Epoch 10/20, Loss: 3.1490\n", + "Total batches to process: 611\n", + "Epoch 11, Batch 100/611 \n", + "Epoch 11, Batch 200/611 \n", + "Epoch 11, Batch 300/611 \n", + "Epoch 11, Batch 400/611 \n", + "Epoch 11, Batch 500/611 \n", + "Epoch 11, Batch 600/611 \n", + "Epoch 11/20, Loss: 2.9762\n", + "Total batches to process: 611\n", + "Epoch 12, Batch 100/611 \n", + "Epoch 12, Batch 200/611 \n", + "Epoch 12, Batch 300/611 \n", + "Epoch 12, Batch 400/611 \n", + "Epoch 12, Batch 500/611 \n", + "Epoch 12, Batch 600/611 \n", + "Epoch 12/20, Loss: 2.8136\n", + "Total batches to process: 611\n", + "Epoch 13, Batch 100/611 \n", + "Epoch 13, Batch 200/611 \n", + "Epoch 13, Batch 300/611 \n", + "Epoch 13, Batch 400/611 \n", + "Epoch 13, Batch 500/611 \n", + "Epoch 13, Batch 600/611 \n", + "Epoch 13/20, Loss: 2.6486\n", + "Total batches to process: 611\n", + "Epoch 14, Batch 100/611 \n", + "Epoch 14, Batch 200/611 \n", + "Epoch 14, Batch 300/611 \n", + "Epoch 14, Batch 400/611 \n", + "Epoch 14, Batch 500/611 \n", + "Epoch 14, Batch 600/611 \n", + "Epoch 14/20, Loss: 2.4945\n", + "Total batches to process: 611\n", + "Epoch 15, Batch 100/611 \n", + "Epoch 15, Batch 200/611 \n", + "Epoch 15, Batch 300/611 \n", + "Epoch 15, Batch 400/611 \n", + "Epoch 15, Batch 500/611 \n", + "Epoch 15, Batch 600/611 \n", + "Epoch 15/20, Loss: 2.3394\n", + "Total batches to process: 611\n", + "Epoch 16, Batch 100/611 \n", + "Epoch 16, Batch 200/611 \n", + "Epoch 16, Batch 300/611 \n", + "Epoch 16, Batch 400/611 \n", + "Epoch 16, Batch 500/611 \n", + "Epoch 16, Batch 600/611 \n", + "Epoch 16/20, Loss: 2.1968\n", + "Total batches to process: 611\n", + "Epoch 17, Batch 100/611 \n", + "Epoch 17, Batch 200/611 \n", + "Epoch 17, Batch 300/611 \n", + "Epoch 17, Batch 400/611 \n", + "Epoch 17, Batch 500/611 \n", + "Epoch 17, Batch 600/611 \n", + "Epoch 17/20, Loss: 2.0583\n", + "Total batches to process: 611\n", + "Epoch 18, Batch 100/611 \n", + "Epoch 18, Batch 200/611 \n", + "Epoch 18, Batch 300/611 \n", + "Epoch 18, Batch 400/611 \n", + "Epoch 18, Batch 500/611 \n", + "Epoch 18, Batch 600/611 \n", + "Epoch 18/20, Loss: 1.9222\n", + "Total batches to process: 611\n", + "Epoch 19, Batch 100/611 \n", + "Epoch 19, Batch 200/611 \n", + "Epoch 19, Batch 300/611 \n", + "Epoch 19, Batch 400/611 \n", + "Epoch 19, Batch 500/611 \n", + "Epoch 19, Batch 600/611 \n", + "Epoch 19/20, Loss: 1.7939\n", + "Total batches to process: 611\n", + "Epoch 20, Batch 100/611 \n", + "Epoch 20, Batch 200/611 \n", + "Epoch 20, Batch 300/611 \n", + "Epoch 20, Batch 400/611 \n", + "Epoch 20, Batch 500/611 \n", + "Epoch 20, Batch 600/611 \n", + "Epoch 20/20, Loss: 1.6632\n", + "Training completed.\n", + "Input: عمرك رايح المكسيك؟\n", + "Translated: I'm going to take a \n", + "Chunk 3 completed: Training and inference implemented.\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", + "\n", + "# Instantiate the model (assuming train_dataset is already defined)\n", + "model = Transformer(\n", + " src_vocab_size=vocab_size,\n", + " tgt_vocab_size=vocab_size\n", + ").to(device)\n", + "\n", + "# Load model checkpoint and set to evaluation mode\n", + "model.load_state_dict(torch.load(\"habibi.pth\", map_location=device))\n", + "model.eval()\n", + "\n", + "def gradio_translate(text):\n", + " return translate(model, text)\n", + "\n", + "interface = gr.Interface(\n", + " fn=gradio_translate,\n", + " inputs=gr.Textbox(lines=2, placeholder=\"Enter Arabic sentence here...\"),\n", + " outputs=\"text\",\n", + " title=\"Arabic to English Translator\",\n", + " description=\"Translate Arabic sentences to English using a Transformer model.\"\n", + ")\n", + "\n", + "interface.launch()\n", + "\n", + "print(\"Chunk 4 completed: Gradio interface deployed.\")" + ], + "metadata": { + "id": "9Nr_qAIFyGMP", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 663 + }, + "outputId": "ac1a4b5b-81d1-42f9-c1c9-4278ffe14285" + }, + "execution_count": 10, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).\n", + "\n", + "Colab notebook detected. To show errors in colab notebook, set debug=True in launch()\n", + "* Running on public URL: https://b4e3cb0ecd69a69020.gradio.live\n", + "\n", + "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "
" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Chunk 4 completed: Gradio interface deployed.\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "torch.save(model.state_dict(), \"habibi.pth\")\n", + "print(\"Model checkpoint saved as transformer_code.pth\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "7aRMjC_eb9Cd", + "outputId": "f1b9217f-ca2b-4e88-f375-29aaa2ccf17c" + }, + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Model checkpoint saved as transformer_code.pth\n" + ] + } + ] + } + ] +} \ No newline at end of file