diff --git "a/Assignment_04_Miami_Hotel_Search.ipynb" "b/Assignment_04_Miami_Hotel_Search.ipynb" new file mode 100644--- /dev/null +++ "b/Assignment_04_Miami_Hotel_Search.ipynb" @@ -0,0 +1,2470 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "TPU", + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "7974ef878bd74038ac047d1f8b04f1c4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_d2ff26eaaa07434090a57295feb73e19", + "IPY_MODEL_4243917723e643f896fd3dae4f110bce", + "IPY_MODEL_392a859772634a6caff03aac53f7d3d6" + ], + "layout": "IPY_MODEL_b31aabad028947dbb455d5eb977d5ff1" + } + }, + "d2ff26eaaa07434090a57295feb73e19": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ce01116cccd64fed820e079b7d907df1", + "placeholder": "​", + "style": "IPY_MODEL_47dfac8bc887458b9cf5993e78a85027", + "value": "Batches: 100%" + } + }, + "4243917723e643f896fd3dae4f110bce": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_db03ae99ca71496f9eb4dafacccc9332", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_501453a4e7ad49a0ab9f54592b7cb4c1", + "value": 1 + } + }, + "392a859772634a6caff03aac53f7d3d6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e23a9d0bcb7d4684ac27273d4bd2954b", + "placeholder": "​", + "style": "IPY_MODEL_9806e9bd7b3e4ffb9cbb30f72821adb6", + "value": " 1/1 [00:00<00:00, 20.99it/s]" + } + }, + "b31aabad028947dbb455d5eb977d5ff1": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ce01116cccd64fed820e079b7d907df1": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "47dfac8bc887458b9cf5993e78a85027": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "db03ae99ca71496f9eb4dafacccc9332": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "501453a4e7ad49a0ab9f54592b7cb4c1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "e23a9d0bcb7d4684ac27273d4bd2954b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9806e9bd7b3e4ffb9cbb30f72821adb6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "faba2b91c175464b8b8f12b4298890e6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_ac0406e564a8489e9f5aaa3597b8c017", + "IPY_MODEL_20812b1a71fa4b12bd688a315142efe1", + "IPY_MODEL_4a2087e6368043e1894751d4584d42c1" + ], + "layout": "IPY_MODEL_032cd6f158e44929bf6961dd40947801" + } + }, + "ac0406e564a8489e9f5aaa3597b8c017": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_865768bcc5a64812806c3802d2d7d142", + "placeholder": "​", + "style": "IPY_MODEL_7556c598e3ba427ab000194f522b11ac", + "value": "Batches: 100%" + } + }, + "20812b1a71fa4b12bd688a315142efe1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6329054f598a4060ab2b959116810006", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_8ece034822c64bb897026a665f1413ec", + "value": 1 + } + }, + "4a2087e6368043e1894751d4584d42c1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ad71bf69758e4edb90da58225003a91b", + "placeholder": "​", + "style": "IPY_MODEL_00c4a0f4ba93474aa0925a8f41ac304d", + "value": " 1/1 [00:00<00:00, 24.43it/s]" + } + }, + "032cd6f158e44929bf6961dd40947801": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "865768bcc5a64812806c3802d2d7d142": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7556c598e3ba427ab000194f522b11ac": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "6329054f598a4060ab2b959116810006": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8ece034822c64bb897026a665f1413ec": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "ad71bf69758e4edb90da58225003a91b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "00c4a0f4ba93474aa0925a8f41ac304d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "cells": [ + { + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "pd.set_option('display.max_columns', None) # show all columns\n", + "pd.set_option('display.expand_frame_repr', False) # do not wrap row values" + ], + "metadata": { + "id": "8g_mJI1vTOIk" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Data Exploration" + ], + "metadata": { + "id": "eRQgdecrU8Lo" + } + }, + { + "cell_type": "code", + "source": [ + "df = pd.read_csv('https://raw.githubusercontent.com/hamzafarooq/maven-mlsystem-design-cohort-1/main/data/miami_hotels.csv')" + ], + "metadata": { + "id": "U8OIj4vOTP2E" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "df.name.value_counts()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "6ZTaN0R5TY3t", + "outputId": "c54ea111-e29b-4506-e0c9-a60416631b5d" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Faena Miami Beach 45\n", + "Setai Hotel Miami 45\n", + "Best Western Plus Atlantic Beach Resort 45\n", + "1 Hotel South Beach 45\n", + "Nobu Hotel Miami Beach 45\n", + " ..\n", + "The Julia Hotel, Miami Beach 6\n", + "Beach Park Hotel 4\n", + "Bikini Lodge 4\n", + "Baltic Hotel 2\n", + "Sherry Frontenac Hotel 1\n", + "Name: name, Length: 65, dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ] + }, + { + "cell_type": "code", + "source": [ + "df = df.drop_duplicates()" + ], + "metadata": { + "id": "BqMuCbcvTcP0" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "df.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "i5i1Mu1JTkRh", + "outputId": "1f4e7e6b-9cb1-4b5e-f9be-6d675d7a55b0" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(2511, 27)" + ] + }, + "metadata": {}, + "execution_count": 17 + } + ] + }, + { + "cell_type": "code", + "source": [ + "df.head()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 833 + }, + "id": "wLtOv6CJTygx", + "outputId": "f38c9eaa-9c6c-4bbe-c92c-d7e3c4ba0b97" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " id type name image awards rankingPosition priceLevel priceRange category rating hotelClass hotelClassAttribution phone address email amenities numberOfRooms prices latitude longitude webUrl website rankingString rankingDenominator numberOfReviews review title\n", + "0 7787044 HOTEL Faena Miami Beach https://media-cdn.tripadvisor.com/media/photo-... [] 5 $$$$ $729 - $1,426 hotel 4.5 0.0 NaN 13055348800 3201 Collins Ave Faena District, Miami Beach, ... reservations-miamibeach@faena.com [] 179 [] 25.807375 -80.12364 https://www.tripadvisor.com/Hotel_Review-g3443... https://www.faena.com/miami-beach #5 of 235 hotels in Miami Beach 235 2123 Hands down my absolute favorite hotel in South... My favorite hotel in Miami--Petar is the best!\n", + "1 7787044 HOTEL Faena Miami Beach https://media-cdn.tripadvisor.com/media/photo-... [] 5 $$$$ $729 - $1,426 hotel 4.5 0.0 NaN 13055348800 3201 Collins Ave Faena District, Miami Beach, ... reservations-miamibeach@faena.com [] 179 [] 25.807375 -80.12364 https://www.tripadvisor.com/Hotel_Review-g3443... https://www.faena.com/miami-beach #5 of 235 hotels in Miami Beach 235 2123 There are not enough words to explain the sump... Exquisite Stay\n", + "2 7787044 HOTEL Faena Miami Beach https://media-cdn.tripadvisor.com/media/photo-... [] 5 $$$$ $729 - $1,426 hotel 4.5 0.0 NaN 13055348800 3201 Collins Ave Faena District, Miami Beach, ... reservations-miamibeach@faena.com [] 179 [] 25.807375 -80.12364 https://www.tripadvisor.com/Hotel_Review-g3443... https://www.faena.com/miami-beach #5 of 235 hotels in Miami Beach 235 2123 I went for the Burlesque show which was absolu... Burlesqu Show\n", + "3 7787044 HOTEL Faena Miami Beach https://media-cdn.tripadvisor.com/media/photo-... [] 5 $$$$ $729 - $1,426 hotel 4.5 0.0 NaN 13055348800 3201 Collins Ave Faena District, Miami Beach, ... reservations-miamibeach@faena.com [] 179 [] 25.807375 -80.12364 https://www.tripadvisor.com/Hotel_Review-g3443... https://www.faena.com/miami-beach #5 of 235 hotels in Miami Beach 235 2123 Dana at Gitano made the night magical! He make... Dana at Gitano is a rockstar!\n", + "4 7787044 HOTEL Faena Miami Beach https://media-cdn.tripadvisor.com/media/photo-... [] 5 $$$$ $729 - $1,426 hotel 4.5 0.0 NaN 13055348800 3201 Collins Ave Faena District, Miami Beach, ... reservations-miamibeach@faena.com [] 179 [] 25.807375 -80.12364 https://www.tripadvisor.com/Hotel_Review-g3443... https://www.faena.com/miami-beach #5 of 235 hotels in Miami Beach 235 2123 I would like to give our very very heartfelt t... Perfect Place" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idtypenameimageawardsrankingPositionpriceLevelpriceRangecategoryratinghotelClasshotelClassAttributionphoneaddressemailamenitiesnumberOfRoomspriceslatitudelongitudewebUrlwebsiterankingStringrankingDenominatornumberOfReviewsreviewtitle
07787044HOTELFaena Miami Beachhttps://media-cdn.tripadvisor.com/media/photo-...[]5$$$$$729 - $1,426hotel4.50.0NaN130553488003201 Collins Ave Faena District, Miami Beach, ...reservations-miamibeach@faena.com[]179[]25.807375-80.12364https://www.tripadvisor.com/Hotel_Review-g3443...https://www.faena.com/miami-beach#5 of 235 hotels in Miami Beach2352123Hands down my absolute favorite hotel in South...My favorite hotel in Miami--Petar is the best!
17787044HOTELFaena Miami Beachhttps://media-cdn.tripadvisor.com/media/photo-...[]5$$$$$729 - $1,426hotel4.50.0NaN130553488003201 Collins Ave Faena District, Miami Beach, ...reservations-miamibeach@faena.com[]179[]25.807375-80.12364https://www.tripadvisor.com/Hotel_Review-g3443...https://www.faena.com/miami-beach#5 of 235 hotels in Miami Beach2352123There are not enough words to explain the sump...Exquisite Stay
27787044HOTELFaena Miami Beachhttps://media-cdn.tripadvisor.com/media/photo-...[]5$$$$$729 - $1,426hotel4.50.0NaN130553488003201 Collins Ave Faena District, Miami Beach, ...reservations-miamibeach@faena.com[]179[]25.807375-80.12364https://www.tripadvisor.com/Hotel_Review-g3443...https://www.faena.com/miami-beach#5 of 235 hotels in Miami Beach2352123I went for the Burlesque show which was absolu...Burlesqu Show
37787044HOTELFaena Miami Beachhttps://media-cdn.tripadvisor.com/media/photo-...[]5$$$$$729 - $1,426hotel4.50.0NaN130553488003201 Collins Ave Faena District, Miami Beach, ...reservations-miamibeach@faena.com[]179[]25.807375-80.12364https://www.tripadvisor.com/Hotel_Review-g3443...https://www.faena.com/miami-beach#5 of 235 hotels in Miami Beach2352123Dana at Gitano made the night magical! He make...Dana at Gitano is a rockstar!
47787044HOTELFaena Miami Beachhttps://media-cdn.tripadvisor.com/media/photo-...[]5$$$$$729 - $1,426hotel4.50.0NaN130553488003201 Collins Ave Faena District, Miami Beach, ...reservations-miamibeach@faena.com[]179[]25.807375-80.12364https://www.tripadvisor.com/Hotel_Review-g3443...https://www.faena.com/miami-beach#5 of 235 hotels in Miami Beach2352123I would like to give our very very heartfelt t...Perfect Place
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ] + }, + { + "cell_type": "code", + "source": [ + "df.columns" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-cDtE40yT0xK", + "outputId": "342e12ea-0214-4853-cea1-f29bf94a634a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Index(['id', 'type', 'name', 'image', 'awards', 'rankingPosition',\n", + " 'priceLevel', 'priceRange', 'category', 'rating', 'hotelClass',\n", + " 'hotelClassAttribution', 'phone', 'address', 'email', 'amenities',\n", + " 'numberOfRooms', 'prices', 'latitude', 'longitude', 'webUrl', 'website',\n", + " 'rankingString', 'rankingDenominator', 'numberOfReviews', 'review',\n", + " 'title'],\n", + " dtype='object')" + ] + }, + "metadata": {}, + "execution_count": 19 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Create Vectors" + ], + "metadata": { + "id": "EvJOQOr0VAWU" + } + }, + { + "cell_type": "code", + "source": [ + "import re\n", + "\n", + "# combine tile and review columns into one col called combined\n", + "df[\"combined\"] = (\n", + " df.title.str.strip() + ' ' + df.review.str.strip()\n", + ")\n", + "\n", + "df_combined = df.copy()\n", + "\n", + "# remove all non-alphanumeric characters\n", + "df_combined['combined'] = df_combined['combined'].apply(lambda x: re.sub('[^a-zA-z0-9\\s]','',str(x)))\n", + "\n", + "# convert all txt to lower case\n", + "def lower_case(input_str):\n", + " input_str = input_str.lower()\n", + " return input_str\n", + "\n", + "df_combined['combined']= df_combined['combined'].apply(lambda x: lower_case(x))" + ], + "metadata": { + "id": "OixPCLrNVExS" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "df_combined[['name', 'combined']].head()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "ZVw1msB5XRue", + "outputId": "174ed80f-0aac-41c5-aef8-b33fff188479" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " name combined\n", + "0 Faena Miami Beach my favorite hotel in miamipetar is the best ha...\n", + "1 Faena Miami Beach exquisite stay there are not enough words to e...\n", + "2 Faena Miami Beach burlesqu show i went for the burlesque show wh...\n", + "3 Faena Miami Beach dana at gitano is a rockstar dana at gitano ma...\n", + "4 Faena Miami Beach perfect place i would like to give our very ve..." + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namecombined
0Faena Miami Beachmy favorite hotel in miamipetar is the best ha...
1Faena Miami Beachexquisite stay there are not enough words to e...
2Faena Miami Beachburlesqu show i went for the burlesque show wh...
3Faena Miami Beachdana at gitano is a rockstar dana at gitano ma...
4Faena Miami Beachperfect place i would like to give our very ve...
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 21 + } + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install -U sentence-transformers rank_bm25 --quiet" + ], + "metadata": { + "id": "_-VkP02zYLeR" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from sentence_transformers import SentenceTransformer, util\n", + "\n", + "embedder = SentenceTransformer('all-mpnet-base-v2')\n", + "# embedder = SentenceTransformer('wvprevue/e5-mistral-7b-instruct')" + ], + "metadata": { + "id": "ek7XUay6XT91" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import time\n", + "import torch\n", + "\n", + "if not torch.cuda.is_available():\n", + " print(\"Warning: No GPU found. Please add GPU to your notebook\")\n", + "else:\n", + " print(\"GPU Found!\")\n", + " embedder = embedder.to('cuda')\n", + "\n", + "df_embedding = df_combined.copy()\n", + "\n", + "startTime = time.time()\n", + "df_embedding[\"embedding\"] = df_embedding.combined.apply(lambda x: embedder.encode(x))\n", + "executionTime = (time.time() - startTime)\n", + "\n", + "print('Execution time in seconds: ' + str(executionTime))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 408 + }, + "id": "azaI56B8YjCO", + "outputId": "b08987a7-db13-4ee6-890b-e2bd8cf083be" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Warning: No GPU found. Please add GPU to your notebook\n" + ] + }, + { + "output_type": "error", + "ename": "KeyboardInterrupt", + "evalue": "", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0mstartTime\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 13\u001b[0;31m \u001b[0mdf_embedding\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"embedding\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf_embedding\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcombined\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0membedder\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mencode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 14\u001b[0m \u001b[0mexecutionTime\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mstartTime\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pandas/core/series.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, func, convert_dtype, args, **kwargs)\u001b[0m\n\u001b[1;32m 4769\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mfloat64\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4770\u001b[0m \"\"\"\n\u001b[0;32m-> 4771\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mSeriesApply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconvert_dtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4772\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4773\u001b[0m def _reduce(\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1121\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1122\u001b[0m \u001b[0;31m# self.f is Callable\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1123\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_standard\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1124\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1125\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0magg\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mapply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1172\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1173\u001b[0m \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobject\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_values\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1174\u001b[0;31m mapped = lib.map_infer(\n\u001b[0m\u001b[1;32m 1175\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1176\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pandas/_libs/lib.pyx\u001b[0m in \u001b[0;36mpandas._libs.lib.map_infer\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m(x)\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0mstartTime\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 13\u001b[0;31m \u001b[0mdf_embedding\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"embedding\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf_embedding\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcombined\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0membedder\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mencode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 14\u001b[0m \u001b[0mexecutionTime\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mstartTime\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/sentence_transformers/SentenceTransformer.py\u001b[0m in \u001b[0;36mencode\u001b[0;34m(self, sentences, batch_size, show_progress_bar, output_value, convert_to_numpy, convert_to_tensor, device, normalize_embeddings)\u001b[0m\n\u001b[1;32m 282\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 283\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mno_grad\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 284\u001b[0;31m \u001b[0mout_features\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfeatures\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 285\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 286\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0moutput_value\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"token_embeddings\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/container.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m 213\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 214\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mmodule\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 215\u001b[0;31m \u001b[0minput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodule\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 216\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 217\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1516\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_compiled_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# type: ignore[misc]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1517\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1518\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1519\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1520\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1525\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_pre_hooks\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_hooks\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1526\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1527\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1528\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1529\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/sentence_transformers/models/Transformer.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, features)\u001b[0m\n\u001b[1;32m 96\u001b[0m \u001b[0mtrans_features\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"token_type_ids\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfeatures\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"token_type_ids\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 97\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 98\u001b[0;31m \u001b[0moutput_states\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mauto_model\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mtrans_features\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreturn_dict\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 99\u001b[0m \u001b[0moutput_tokens\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0moutput_states\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 100\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1516\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_compiled_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# type: ignore[misc]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1517\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1518\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1519\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1520\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1525\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_pre_hooks\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_hooks\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1526\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1527\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1528\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1529\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/models/mpnet/modeling_mpnet.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input_ids, attention_mask, position_ids, head_mask, inputs_embeds, output_attentions, output_hidden_states, return_dict, **kwargs)\u001b[0m\n\u001b[1;32m 549\u001b[0m \u001b[0mhead_mask\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_head_mask\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhead_mask\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnum_hidden_layers\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 550\u001b[0m \u001b[0membedding_output\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0membeddings\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput_ids\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minput_ids\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mposition_ids\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mposition_ids\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs_embeds\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minputs_embeds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 551\u001b[0;31m encoder_outputs = self.encoder(\n\u001b[0m\u001b[1;32m 552\u001b[0m \u001b[0membedding_output\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 553\u001b[0m \u001b[0mattention_mask\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mextended_attention_mask\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1516\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_compiled_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# type: ignore[misc]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1517\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1518\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1519\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1520\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1525\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_pre_hooks\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_hooks\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1526\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1527\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1528\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1529\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/models/mpnet/modeling_mpnet.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, hidden_states, attention_mask, head_mask, output_attentions, output_hidden_states, return_dict, **kwargs)\u001b[0m\n\u001b[1;32m 339\u001b[0m \u001b[0mall_hidden_states\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mall_hidden_states\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mhidden_states\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 340\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 341\u001b[0;31m layer_outputs = layer_module(\n\u001b[0m\u001b[1;32m 342\u001b[0m \u001b[0mhidden_states\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 343\u001b[0m \u001b[0mattention_mask\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1516\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_compiled_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# type: ignore[misc]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1517\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1518\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1519\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1520\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1525\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_pre_hooks\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_hooks\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1526\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1527\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1528\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1529\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/models/mpnet/modeling_mpnet.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, hidden_states, attention_mask, head_mask, position_bias, output_attentions, **kwargs)\u001b[0m\n\u001b[1;32m 309\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 310\u001b[0m \u001b[0mintermediate_output\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mintermediate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mattention_output\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 311\u001b[0;31m \u001b[0mlayer_output\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mintermediate_output\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mattention_output\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 312\u001b[0m \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mlayer_output\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0moutputs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 313\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0moutputs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1516\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_compiled_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# type: ignore[misc]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1517\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1518\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1519\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1520\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1525\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_pre_hooks\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_hooks\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1526\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1527\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1528\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1529\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/models/mpnet/modeling_mpnet.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, hidden_states, input_tensor)\u001b[0m\n\u001b[1;32m 276\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 277\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhidden_states\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTensor\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput_tensor\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTensor\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTensor\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 278\u001b[0;31m \u001b[0mhidden_states\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdense\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhidden_states\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 279\u001b[0m \u001b[0mhidden_states\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdropout\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhidden_states\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 280\u001b[0m \u001b[0mhidden_states\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mLayerNorm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhidden_states\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0minput_tensor\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1516\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_compiled_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# type: ignore[misc]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1517\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1518\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1519\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1520\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1525\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_pre_hooks\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_hooks\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1526\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1527\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1528\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1529\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/linear.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m 112\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 113\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mTensor\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mTensor\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 114\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mF\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlinear\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mweight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbias\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 115\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 116\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mextra_repr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "df_embedding = df_combined.copy()\n", + "\n", + "\n", + "df_embedding[\"embedding\"] = df_embedding.combined.apply(lambda x: embedder.encode(x))" + ], + "metadata": { + "id": "BKEJpt3xbl-D" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# !pip install umap-learn" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "lu1Y-SJ0g8Jn", + "outputId": "12044563-9ae5-43f3-833a-9727f3a16bf7" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting umap-learn\n", + " Downloading umap-learn-0.5.5.tar.gz (90 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m90.9/90.9 kB\u001b[0m \u001b[31m3.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from umap-learn) (1.23.5)\n", + "Requirement already satisfied: scipy>=1.3.1 in /usr/local/lib/python3.10/dist-packages (from umap-learn) (1.11.4)\n", + "Requirement already satisfied: scikit-learn>=0.22 in /usr/local/lib/python3.10/dist-packages (from umap-learn) (1.2.2)\n", + "Requirement already satisfied: numba>=0.51.2 in /usr/local/lib/python3.10/dist-packages (from umap-learn) (0.58.1)\n", + "Collecting pynndescent>=0.5 (from umap-learn)\n", + " Downloading pynndescent-0.5.11-py3-none-any.whl (55 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.8/55.8 kB\u001b[0m \u001b[31m8.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from umap-learn) (4.66.1)\n", + "Requirement already satisfied: llvmlite<0.42,>=0.41.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba>=0.51.2->umap-learn) (0.41.1)\n", + "Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.10/dist-packages (from pynndescent>=0.5->umap-learn) (1.3.2)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.22->umap-learn) (3.2.0)\n", + "Building wheels for collected packages: umap-learn\n", + " Building wheel for umap-learn (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for umap-learn: filename=umap_learn-0.5.5-py3-none-any.whl size=86832 sha256=887f10c7c74893c7e09fda5a2cb4d0a2d93631b4b9006d546938551ac33c380b\n", + " Stored in directory: /root/.cache/pip/wheels/3a/70/07/428d2b58660a1a3b431db59b806a10da736612ebbc66c1bcc5\n", + "Successfully built umap-learn\n", + "Installing collected packages: pynndescent, umap-learn\n", + "Successfully installed pynndescent-0.5.11 umap-learn-0.5.5\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# import umap\n", + "# import numpy as np\n", + "# from tqdm import tqdm\n", + "\n", + "# embeddings = df_embedding['embedding'].apply(np.array).apply(np.ravel)\n", + "# umap_transform = umap.UMAP(random_state=0, transform_seed=0).fit(embeddings.values.reshape(-1, embeddings.size))" + ], + "metadata": { + "id": "BAqv8L6sgrNW" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# def project_embeddings(embeddings, umap_transform):\n", + "# umap_embeddings = np.empty((len(embeddings),2))\n", + "# for i, embedding in enumerate(tqdm(embeddings)):\n", + "# umap_embeddings[i] = umap_transform.transform([embedding])\n", + "# return umap_embeddings" + ], + "metadata": { + "id": "EM4566Adg4BS" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# projected_dataset_embeddings = project_embeddings(embeddings, umap_transform)" + ], + "metadata": { + "id": "Vtl4nrdGg56P" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# import matplotlib.pyplot as plt\n", + "\n", + "# plt.figure()\n", + "# plt.scatter(projected_dataset_embeddings[:, 0], projected_dataset_embeddings[:, 1], s=10)\n", + "# plt.gca().set_aspect('equal', 'datalim')\n", + "# plt.title('Projected Embeddings')\n", + "# plt.axis('off')" + ], + "metadata": { + "id": "XmSImtgXhrQA" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "#### Save Data to GoogleDrive" + ], + "metadata": { + "id": "p0WWGo6saEGd" + } + }, + { + "cell_type": "code", + "source": [ + "!mkdir Semantic_Search" + ], + "metadata": { + "id": "ALS7cU3faGZD" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# save\n", + "\n", + "import os\n", + "from humanize import naturalsize\n", + "\n", + "file_path = 'Semantic_Search/hotel_review_full_data.pkl'\n", + "\n", + "df_embedding.to_pickle(file_path)\n", + "\n", + "file_size = os.path.getsize(file_path)\n", + "\n", + "print(f\"Size of {file_path}: {naturalsize(file_size)}\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 245 + }, + "id": "ZXoQF6IqaHQi", + "outputId": "6fe08937-4f0b-4e61-bb47-ef5001fe7296" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "error", + "ename": "NameError", + "evalue": "name 'df_embedding' is not defined", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0mfile_path\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'Semantic_Search/hotel_review_full_data.pkl'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m \u001b[0mdf_embedding\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_pickle\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfile_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 9\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0mfile_size\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetsize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfile_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'df_embedding' is not defined" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# load\n", + "\n", + "import pandas as pd\n", + "df_hotels = pd.read_pickle('Semantic_Search/hotel_review_full_data.pkl')" + ], + "metadata": { + "id": "p0x7Oej3gjAD" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Semantic Search" + ], + "metadata": { + "id": "5yhNdu6IaxsE" + } + }, + { + "cell_type": "code", + "source": [ + "!pip install faiss-gpu" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ZAvJUFu3cpmD", + "outputId": "a7167ed6-8860-45fd-eef1-11cc5e9db70a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting faiss-gpu\n", + " Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (85.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m85.5/85.5 MB\u001b[0m \u001b[31m9.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: faiss-gpu\n", + "Successfully installed faiss-gpu-1.7.2\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Query inputs\n", + "\n", + "k = 25\n", + "query1 = 'close to the beach with free breakfast for a good value'\n", + "\n", + "query_embedding1 = embedder.encode(query1,show_progress_bar=True)\n", + "query_embedding1.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 66, + "referenced_widgets": [ + "7974ef878bd74038ac047d1f8b04f1c4", + "d2ff26eaaa07434090a57295feb73e19", + "4243917723e643f896fd3dae4f110bce", + "392a859772634a6caff03aac53f7d3d6", + "b31aabad028947dbb455d5eb977d5ff1", + "ce01116cccd64fed820e079b7d907df1", + "47dfac8bc887458b9cf5993e78a85027", + "db03ae99ca71496f9eb4dafacccc9332", + "501453a4e7ad49a0ab9f54592b7cb4c1", + "e23a9d0bcb7d4684ac27273d4bd2954b", + "9806e9bd7b3e4ffb9cbb30f72821adb6" + ] + }, + "id": "89FG7RKtcXUl", + "outputId": "6307d2ec-f3de-48d3-ca23-24635ff605c0" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Batches: 0%| | 0/1 [00:00\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mquery3\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'Best place to visit in Summer which has a good sea view site and bigger pools'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mquery_embedding3\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0membedder\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mencode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mquery3\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mshow_progress_bar\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0mquery_embedding3\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'embedder' is not defined" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import faiss\n", + "import numpy as np\n", + "\n", + "# Initialize a new index with the same dimension as the query vector\n", + "d = query_embedding1.shape[0]\n", + "index = faiss.IndexFlatL2(d)\n", + "\n", + "# Add the embedding column to the index\n", + "startTime = time.time()\n", + "X = np.vstack(df_hotels['embedding'].to_numpy()) # convert the embedding column to a NumPy array and stack it into a 2D array\n", + "index.add(X)\n", + "executionTime = (time.time() - startTime)\n", + "print('Index build time in seconds: ' + str(executionTime))\n", + "\n", + "# Perform similarity search using the query vector\n", + "startTime = time.time()\n", + "distances, indices = index.search(np.array([query_embedding1]), k=k)\n", + "executionTime = (time.time() - startTime)\n", + "print('Search time in seconds: ' + str(executionTime))\n", + "\n", + "# Get the corresponding row indices in the original DataFrame\n", + "closest_neighbors1 = df_hotels.iloc[indices[0]] # get the closest neighbors\n", + "closest_neighbors1['similarity_score'] = distances[0] # add a new column with the similarity scores\n", + "closest_neighbors1 = closest_neighbors1.sort_values(by='similarity_score', ascending=False)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Fjag_y0iazmh", + "outputId": "c1788bbf-7b21-411c-b3da-ea1e40979224" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Index build time in seconds: 0.008175849914550781\n", + "Search time in seconds: 0.002689838409423828\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + ":23: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " closest_neighbors1['similarity_score'] = distances[0] # add a new column with the similarity scores\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import faiss\n", + "import numpy as np\n", + "\n", + "# Initialize a new index with the same dimension as the query vector\n", + "d = query_embedding2.shape[0]\n", + "index = faiss.IndexFlatL2(d)\n", + "\n", + "\n", + "# Add the embedding column to the index\n", + "startTime = time.time()\n", + "X = np.vstack(df_hotels['embedding'].to_numpy()) # convert the embedding column to a NumPy array and stack it into a 2D array\n", + "index.add(X)\n", + "executionTime = (time.time() - startTime)\n", + "print('Index build time in seconds: ' + str(executionTime))\n", + "\n", + "# Perform similarity search using the query vector\n", + "startTime = time.time()\n", + "distances, indices = index.search(np.array([query_embedding2]), k=k)\n", + "executionTime = (time.time() - startTime)\n", + "print('Search time in seconds: ' + str(executionTime))\n", + "\n", + "# Get the corresponding row indices in the original DataFrame\n", + "closest_neighbors2 = df_hotels.iloc[indices[0]] # get the closest neighbors\n", + "closest_neighbors2['similarity_score'] = distances[0] # add a new column with the similarity scores\n", + "closest_neighbors2 = closest_neighbors2.sort_values(by='similarity_score', ascending=False)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JjAkFYQ5i_5O", + "outputId": "b2e6f4f3-d61b-4da7-b5ee-ccebe800a2d7" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Index build time in seconds: 0.008663654327392578\n", + "Search time in seconds: 0.0028831958770751953\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + ":23: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " closest_neighbors2['similarity_score'] = distances[0] # add a new column with the similarity scores\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "print(closest_neighbors1[['name','similarity_score']].head())" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "b7OAb33Fcnh6", + "outputId": "f6aa4257-f863-41c7-84ab-ad3dbd522b46" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " name similarity_score\n", + "708 Hilton Grand Vacations Club McAlpin Ocean Plaz... 0.664502\n", + "811 The Meridian Miami 0.662270\n", + "1244 Penguin Hotel 0.658242\n", + "791 The Meridian Miami 0.653557\n", + "1042 The Savoy Hotel & Beach Club 0.651536\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "LCkTpsKe2P9m" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import faiss\n", + "import numpy as np\n", + "\n", + "# Initialize a new index with the same dimension as the query vector\n", + "d = query_embedding2.shape[0]\n", + "index = faiss.IndexFlatL2(d)\n", + "\n", + "%%time\n", + "xq = model.encode([\" A person is dancing and enjoying in the rain\"])\n", + "k=5\n", + "D, I = lsh_index.search(xq, k) # search\n", + "print(I)\n", + "# Add the embedding column to the index\n", + "startTime = time.time()\n", + "X = np.vstack(df_hotels['embedding'].to_numpy()) # convert the embedding column to a NumPy array and stack it into a 2D array\n", + "index.add(X)\n", + "executionTime = (time.time() - startTime)\n", + "print('Index build time in seconds: ' + str(executionTime))\n", + "\n", + "# Perform similarity search using the query vector\n", + "startTime = time.time()\n", + "distances, indices = index.search(np.array([query_embedding2]), k=k)\n", + "executionTime = (time.time() - startTime)\n", + "print('Search time in seconds: ' + str(executionTime))\n", + "\n", + "# Get the corresponding row indices in the original DataFrame\n", + "closest_neighbors2 = df_hotels.iloc[indices[0]] # get the closest neighbors\n", + "closest_neighbors2['similarity_score'] = distances[0] # add a new column with the similarity scores\n", + "closest_neighbors2 = closest_neighbors2.sort_values(by='similarity_score', ascending=False)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "b2e6f4f3-d61b-4da7-b5ee-ccebe800a2d7", + "id": "UMwn8KG92Q7d" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Index build time in seconds: 0.008663654327392578\n", + "Search time in seconds: 0.0028831958770751953\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + ":23: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " closest_neighbors2['similarity_score'] = distances[0] # add a new column with the similarity scores\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "print('Query:', query1, '\\n')\n", + "\n", + "for index, row in closest_neighbors1.head().iterrows():\n", + " print(\"HOTEL NAME:\\t\"+row['name'],'\\n',\n", + " \"SIMILARITY SCORE:\\t\",row['similarity_score'],\n", + " '\\n',\"TEXT IDENTIFIED:\\t\" ,row['combined'],'\\n')\n", + "df_new1=pd.DataFrame(print(closest_neighbors1[['name','combined','similarity_score']].head()))\n", + "q_name_1 = 'qyery1.xlsx'\n", + "\n", + "# saving the excel\n", + "df_new1.to_excel(q_name_1)\n", + "print('DataFrame is written to Excel File successfully.')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "gFLxp568c7-3", + "outputId": "6a30f8c0-2884-41b3-e35b-1ba477e63548" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Query: close to the beach with free breakfast for a good value \n", + "\n", + "HOTEL NAME:\tHilton Grand Vacations Club McAlpin Ocean Plaza Miami \n", + " SIMILARITY SCORE:\t 0.6645020842552185 \n", + " TEXT IDENTIFIED:\t fantastic spot on south beach stayed a week here and thoroughly enjoyed it the staff were very friendly and attentive hotel is located in the perfect spot right in the heart of south beach close to restaurants nightlife starbucks and the beach its far enough down ocean blvd however to avoid the noise highly recommend we will be back soon \n", + "\n", + "HOTEL NAME:\tThe Meridian Miami \n", + " SIMILARITY SCORE:\t 0.662269651889801 \n", + " TEXT IDENTIFIED:\t great would recommend clean walking distance from beach nice pool and sitting area luis was very friendly and helpful coffee and snacks are available luis recommended close by places to eat and gave directions nice \n", + "\n", + "HOTEL NAME:\tPenguin Hotel \n", + " SIMILARITY SCORE:\t 0.6582424640655518 \n", + " TEXT IDENTIFIED:\t great location for the price i love this quaint little place directly across from the beach the welcome drinks and breakfast make it a great value david was the best at the front desk he was friendly and helpful and truly the bombcom i will be back again \n", + "\n", + "HOTEL NAME:\tThe Meridian Miami \n", + " SIMILARITY SCORE:\t 0.6535568833351135 \n", + " TEXT IDENTIFIED:\t well placed hotel we stayed one night in this hotel before joining a cruise the hotel is in a good spot and the rooms are clean there is a coffee shop and a small bar in this hotel the hotel reception staff are very helpful and welcoming you can walk to nearby restaurants and shops from here \n", + "\n", + "HOTEL NAME:\tThe Savoy Hotel & Beach Club \n", + " SIMILARITY SCORE:\t 0.6515356302261353 \n", + " TEXT IDENTIFIED:\t best place awesome service pool staff at the bar and food was amazing had a great time by the beach everything extremely clean \n", + "everyone there was so nice very family friendly place \n", + "we spend most the time at the beach and pool and the managers at bar went out of their way to accommodate every need we had \n", + "\n", + " name combined similarity_score\n", + "708 Hilton Grand Vacations Club McAlpin Ocean Plaz... fantastic spot on south beach stayed a week he... 0.664502\n", + "811 The Meridian Miami great would recommend clean walking distance f... 0.662270\n", + "1244 Penguin Hotel great location for the price i love this quain... 0.658242\n", + "791 The Meridian Miami well placed hotel we stayed one night in this ... 0.653557\n", + "1042 The Savoy Hotel & Beach Club best place awesome service pool staff at the ... 0.651536\n", + "DataFrame is written to Excel File successfully.\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "print('Query:', query2, '\\n')\n", + "\n", + "for index, row in closest_neighbors2.head().iterrows():\n", + " print(\"HOTEL NAME:\\t\"+row['name'],'\\n',\n", + " \"SIMILARITY SCORE:\\t\",row['similarity_score'],\n", + " '\\n',\"TEXT IDENTIFIED:\\t\" ,row['combined'],'\\n')\n", + "df_new2=pd.DataFrame(print(closest_neighbors2[['name','combined','similarity_score']].head()))" + ], + "metadata": { + "id": "j_JLBlZWdttZ", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "f2351c82-a57a-4cac-b282-43b9aaeed0b5" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Query: Best place to visit in Summer which has a good sea view site and bigger pools \n", + "\n", + "HOTEL NAME:\tHotel Breakwater South Beach \n", + " SIMILARITY SCORE:\t 0.7800235152244568 \n", + " TEXT IDENTIFIED:\t love great spot on ocean drive staff lilykaielza were amazing very politehelpful answered all our questions we are heading back in june for another week we were there first week in march nice pool nice option if you dont want the beach \n", + "\n", + "HOTEL NAME:\tThe Marlin Hotel \n", + " SIMILARITY SCORE:\t 0.7773655652999878 \n", + " TEXT IDENTIFIED:\t best place that we have stayed at in south beach hotels on south beach can be so hit and miss due to old buildings and being a noisy area but we were super happy with the marlin \n", + "\n", + "great reception staff quiet rooms at night beach towels and chairs supplied valet parking for 35 per night perfect location comfortable bedding and amazing soft robes to use \n", + "\n", + "we will be using this hotel again when we return \n", + "\n", + "HOTEL NAME:\tNautilus by Arlo \n", + " SIMILARITY SCORE:\t 0.7743663191795349 \n", + " TEXT IDENTIFIED:\t my extensive hotel research paid off staff helpful warm fivestar service oscar lana arianna melody tea giuliano and jackie take pride in this retro boutique hotel and provided worldclass service\n", + "\n", + "room we booked a king ocean front with a pullout sofa bed it was fantastic and huge the bathroom was massive especially for south beach the minibar was epically stocked and there was an espresso machine too we watched the glorious sunset and sunrise from our huge window it was so quiet i thought maybe no one was on either side of us until i saw our neighbors the next morning heading to the rooftop terrace with coffee the rooftop terrace was incredible oscar even made sure we knew how to access the terrace\n", + "\n", + "pool the pool was topnotch the water was the perfect temperature crystal clear not crowded tranquil quality guests and very polished staff i love that they have a delightful young man dedicated to escorting you to available padded loungers im sure this would be especially helpful at times when its near capacity no awkward weaving through the chairs and scooting them around to fit your group \n", + "\n", + "cabana we were so fortunate to have a cabana our first day as we wanted to heal our sunburns from our cruise svetlana took excellent care of us and anticipated our every need the cabana had a large cushioned couch cushioned ottoman coffee table smeg mini fridge standing retro cooler fan large tv towels storage and a super rad orange safe the pia coladas were boozy and not too sugary protip go with a double lunch review below\n", + "\n", + "beach direct easy access the private beach is steps from the hotel just stroll through the bamboo tunnel which runs parallel to the pool exit the secure door you need a room key to get in a few steps across the bikewalking trail to a cute little sandy trail and youll see the navy and white striped nautilus umbrellas and loungers \n", + "\n", + "food the cappuccinos at the coffee bar in the lobby were perfect the lunch from the cabana club was delish we ordered the cubano tuna togarashi salad and kids hamburger this was our 4th cuban sandwich on our vacation and it was the best \n", + "\n", + "location i preferred the location of nautilus 18th and collins on south beach over the other hotel 9th street and collins we stayed at before our cruise it is quieter and more upscale its a short walk or uber to espaola way the shops and restaurants on lincoln road and the port of miami was only about 15 minutes \n", + "\n", + "final thoughts our 8 year old kid said this resort was her favorite part of our vacation beating atlantis and the cruise ship we were married at four seasons the biltmore santa barbara and the service level at nautilus was beyond any four seasons weve stayed at it was truly paradise and were already planning a return trip my only regret is that we didnt stay longer \n", + "\n", + "HOTEL NAME:\tKimpton Angler's Hotel \n", + " SIMILARITY SCORE:\t 0.7728005051612854 \n", + " TEXT IDENTIFIED:\t kiloton anglers south beach great location two blocks from ocean ave the rooms are large and modern very very clean hotel parking is valet only but with inout privileges at the front of the hotel did not try the pool as it was cold during our stay but the bar is nice and has a generous menu \n", + "\n", + "HOTEL NAME:\tHilton Grand Vacations Club McAlpin Ocean Plaza Miami \n", + " SIMILARITY SCORE:\t 0.7708964943885803 \n", + " TEXT IDENTIFIED:\t fantastic location cant beat staying on ocean drivenow pedestrian steps from the beach rooms are clean with great amenities ive stayed here 6 times and have always had an amazing experience if you need help or advice be sure to ask tall paul \n", + "\n", + " name combined similarity_score\n", + "2067 Hotel Breakwater South Beach love great spot on ocean drive staff lilykaiel... 0.780024\n", + "2258 The Marlin Hotel best place that we have stayed at in south bea... 0.777366\n", + "380 Nautilus by Arlo my extensive hotel research paid off staff hel... 0.774366\n", + "1001 Kimpton Angler's Hotel kiloton anglers south beach great location two... 0.772801\n", + "735 Hilton Grand Vacations Club McAlpin Ocean Plaz... fantastic location cant beat staying on ocean ... 0.770896\n" + ] + } + ] + } + ] +} \ No newline at end of file