Spaces:
Sleeping
Sleeping
Upload 6 files
Browse files- .env +3 -0
- Data_Collection.ipynb +326 -0
- all_hotels.txt +0 -0
- default.sqlite +0 -0
- travel.py +103 -0
- vectorstore.ipynb +102 -0
.env
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
GOOGLE_API_KEY=AIzaSyCxx1xZxh3LsIHD2M205dOAoMKC6IzatjI
|
2 |
+
ANTHROPIC_API_KEY=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
|
3 |
+
AURA_API_KEY=988584492fe87315ff6a4754be8613fa4b4fe87d
|
Data_Collection.ipynb
ADDED
@@ -0,0 +1,326 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": null,
|
6 |
+
"metadata": {
|
7 |
+
"colab": {
|
8 |
+
"base_uri": "https://localhost:8080/"
|
9 |
+
},
|
10 |
+
"id": "GdqJu0iA0Qkm",
|
11 |
+
"outputId": "a1d4fdd0-36f5-42de-f4a3-31fa124a0fb3"
|
12 |
+
},
|
13 |
+
"outputs": [
|
14 |
+
{
|
15 |
+
"output_type": "stream",
|
16 |
+
"name": "stdout",
|
17 |
+
"text": [
|
18 |
+
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
19 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m850.5/850.5 kB\u001b[0m \u001b[31m10.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
20 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.6/75.6 kB\u001b[0m \u001b[31m8.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
21 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.8/77.8 kB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
22 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
23 |
+
"\u001b[?25h Building wheel for google-search-results (setup.py) ... \u001b[?25l\u001b[?25hdone\n"
|
24 |
+
]
|
25 |
+
}
|
26 |
+
],
|
27 |
+
"source": [
|
28 |
+
"!pip install --upgrade google-search-results --quiet"
|
29 |
+
]
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"cell_type": "code",
|
33 |
+
"execution_count": null,
|
34 |
+
"metadata": {
|
35 |
+
"id": "XEzTZ14602JJ"
|
36 |
+
},
|
37 |
+
"outputs": [],
|
38 |
+
"source": [
|
39 |
+
"import serpapi\n",
|
40 |
+
"import time\n",
|
41 |
+
"serpapi_key=\"your key\""
|
42 |
+
]
|
43 |
+
},
|
44 |
+
{
|
45 |
+
"cell_type": "code",
|
46 |
+
"source": [
|
47 |
+
"most_visited_countries = [\n",
|
48 |
+
" \"France\", \"Spain\", \"United States\", \"China\", \"Italy\", \"United Kingdom\", \"Germany\", \"Mexico\", \"Thailand\", \"Turkey\",\n",
|
49 |
+
" \"Austria\", \"Malaysia\", \"Russia\", \"Greece\", \"Japan\", \"Canada\", \"Portugal\", \"Saudi Arabia\", \"Australia\", \"Netherlands\",\n",
|
50 |
+
" \"South Korea\", \"Egypt\", \"United Arab Emirates\", \"Singapore\", \"Indonesia\", \"India\", \"Vietnam\", \"Croatia\", \"Switzerland\",\n",
|
51 |
+
" \"Poland\", \"South Africa\", \"Sweden\", \"Morocco\", \"Philippines\", \"Argentina\", \"Brazil\", \"Belgium\", \"Denmark\", \"Norway\",\n",
|
52 |
+
" \"Peru\", \"Czech Republic\", \"Hungary\", \"Finland\", \"Ireland\", \"New Zealand\", \"Kenya\", \"Tanzania\", \"Israel\", \"Colombia\",\n",
|
53 |
+
" \"Chile\", \"Iran\", \"Qatar\", \"Oman\", \"Ukraine\", \"Jordan\", \"Romania\", \"Kazakhstan\", \"Pakistan\", \"Bangladesh\", \"Nepal\",\n",
|
54 |
+
" \"Bhutan\", \"Sri Lanka\", \"Myanmar\", \"Cambodia\", \"Laos\", \"Ethiopia\", \"Nigeria\", \"Kenya\", \"Uganda\", \"Rwanda\", \"Botswana\",\n",
|
55 |
+
" \"Zimbabwe\", \"Namibia\", \"Zambia\", \"Tunisia\", \"Algeria\", \"Mauritius\", \"Mauritania\", \"Senegal\", \"Ghana\", \"Ivory Coast\",\n",
|
56 |
+
" \"Cameroon\", \"Gabon\", \"Madagascar\", \"Seychelles\", \"Fiji\", \"Solomon Islands\", \"Vanuatu\", \"Samoa\", \"Tonga\", \"Kiribati\",\n",
|
57 |
+
" \"Tuvalu\", \"Marshall Islands\", \"Palau\", \"Micronesia\", \"Nauru\", \"Cook Islands\", \"Niue\", \"Tokelau\", \"Comoros\", \"Maldives\",\n",
|
58 |
+
" \"Cape Verde\", \"São Tomé and Príncipe\", \"Saint Kitts and Nevis\", \"Saint Lucia\", \"Saint Vincent and the Grenadines\",\n",
|
59 |
+
" \"Dominica\", \"Antigua and Barbuda\", \"Barbados\", \"Grenada\", \"Belize\", \"Suriname\", \"Guyana\", \"Paraguay\"\n",
|
60 |
+
"]"
|
61 |
+
],
|
62 |
+
"metadata": {
|
63 |
+
"id": "1AvPO_JMlwEo"
|
64 |
+
},
|
65 |
+
"execution_count": null,
|
66 |
+
"outputs": []
|
67 |
+
},
|
68 |
+
{
|
69 |
+
"cell_type": "code",
|
70 |
+
"source": [
|
71 |
+
"hotels = []"
|
72 |
+
],
|
73 |
+
"metadata": {
|
74 |
+
"id": "BUvNcOk8pGLu"
|
75 |
+
},
|
76 |
+
"execution_count": null,
|
77 |
+
"outputs": []
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"cell_type": "code",
|
81 |
+
"execution_count": null,
|
82 |
+
"metadata": {
|
83 |
+
"id": "oma2S1ai0QhM",
|
84 |
+
"colab": {
|
85 |
+
"base_uri": "https://localhost:8080/"
|
86 |
+
},
|
87 |
+
"outputId": "a860bfc8-9949-489f-b6dd-7b2fa5ebb8da"
|
88 |
+
},
|
89 |
+
"outputs": [
|
90 |
+
{
|
91 |
+
"output_type": "stream",
|
92 |
+
"name": "stdout",
|
93 |
+
"text": [
|
94 |
+
"Total time for country: South Africa is 3.1462161540985107\n",
|
95 |
+
"Total time for country: Sweden is 4.135618448257446\n",
|
96 |
+
"Total time for country: Morocco is 3.870520830154419\n",
|
97 |
+
"Total time for country: Philippines is 3.1940598487854004\n",
|
98 |
+
"Total time for country: Argentina is 2.7493534088134766\n",
|
99 |
+
"Total time for country: Brazil is 3.4907991886138916\n",
|
100 |
+
"Total time for country: Belgium is 3.385352373123169\n",
|
101 |
+
"Total time for country: Denmark is 5.446274518966675\n",
|
102 |
+
"Total time for country: Norway is 2.5857763290405273\n",
|
103 |
+
"Total time for country: Peru is 3.4917497634887695\n",
|
104 |
+
"Total time for country: Czech Republic is 3.0693607330322266\n",
|
105 |
+
"Total time for country: Hungary is 3.3840672969818115\n",
|
106 |
+
"Total time for country: Finland is 3.126199722290039\n",
|
107 |
+
"Total time for country: Ireland is 7.195910692214966\n",
|
108 |
+
"Total time for country: New Zealand is 2.894592523574829\n",
|
109 |
+
"Total time for country: Kenya is 3.255056858062744\n",
|
110 |
+
"Total time for country: Tanzania is 2.524628162384033\n",
|
111 |
+
"Total time for country: Israel is 5.516220808029175\n",
|
112 |
+
"Total time for country: Colombia is 6.803558111190796\n",
|
113 |
+
"Total time for country: Chile is 5.247052907943726\n",
|
114 |
+
"Total time for country: Qatar is 6.248284339904785\n",
|
115 |
+
"Total time for country: Oman is 2.459817886352539\n",
|
116 |
+
"Total time for country: Ukraine is 2.5123510360717773\n",
|
117 |
+
"Total time for country: Jordan is 3.0355286598205566\n",
|
118 |
+
"Total time for country: Romania is 3.6406311988830566\n",
|
119 |
+
"Total time for country: Kazakhstan is 3.4437084197998047\n",
|
120 |
+
"Total time for country: Pakistan is 3.1847915649414062\n",
|
121 |
+
"Total time for country: Bangladesh is 4.030826807022095\n",
|
122 |
+
"Total time for country: Nepal is 2.4323649406433105\n",
|
123 |
+
"Total time for country: Bhutan is 2.3996434211730957\n",
|
124 |
+
"Total time for country: Sri Lanka is 4.682724952697754\n",
|
125 |
+
"Total time for country: Myanmar is 2.983024835586548\n",
|
126 |
+
"Total time for country: Cambodia is 2.5116500854492188\n",
|
127 |
+
"Total time for country: Laos is 2.3876218795776367\n",
|
128 |
+
"Total time for country: Ethiopia is 2.619466543197632\n",
|
129 |
+
"Total time for country: Nigeria is 3.833418846130371\n",
|
130 |
+
"Total time for country: Kenya is 0.1747727394104004\n",
|
131 |
+
"Total time for country: Uganda is 3.2861359119415283\n",
|
132 |
+
"Total time for country: Rwanda is 2.4624242782592773\n",
|
133 |
+
"Total time for country: Botswana is 2.4395322799682617\n",
|
134 |
+
"Total time for country: Zimbabwe is 2.4967572689056396\n",
|
135 |
+
"Total time for country: Namibia is 3.7246780395507812\n",
|
136 |
+
"Total time for country: Zambia is 2.8809072971343994\n",
|
137 |
+
"Total time for country: Tunisia is -0.0001838207244873047\n",
|
138 |
+
"Total time for country: Algeria is 3.2204713821411133\n",
|
139 |
+
"Total time for country: Mauritius is 4.015950918197632\n",
|
140 |
+
"Total time for country: Mauritania is 2.07582426071167\n",
|
141 |
+
"Total time for country: Senegal is 2.4191691875457764\n",
|
142 |
+
"Total time for country: Ghana is 1.9542012214660645\n",
|
143 |
+
"Total time for country: Ivory Coast is 3.2751688957214355\n",
|
144 |
+
"Total time for country: Cameroon is 2.822939872741699\n",
|
145 |
+
"Total time for country: Gabon is 2.2693305015563965\n",
|
146 |
+
"Total time for country: Madagascar is 3.0742127895355225\n",
|
147 |
+
"Total time for country: Seychelles is 2.3126015663146973\n",
|
148 |
+
"Total time for country: Fiji is 5.4941325187683105\n",
|
149 |
+
"Total time for country: Solomon Islands is 2.9408013820648193\n",
|
150 |
+
"Total time for country: Vanuatu is 3.2782294750213623\n",
|
151 |
+
"Total time for country: Samoa is 2.7137160301208496\n",
|
152 |
+
"Total time for country: Tonga is -0.0009427070617675781\n",
|
153 |
+
"Total time for country: Kiribati is -2.0623419284820557\n",
|
154 |
+
"Total time for country: Tuvalu is -9.399558782577515\n",
|
155 |
+
"Total time for country: Marshall Islands is -10.583814144134521\n",
|
156 |
+
"Total time for country: Palau is 4.033239841461182\n",
|
157 |
+
"Total time for country: Micronesia is -0.0008416175842285156\n",
|
158 |
+
"Total time for country: Nauru is -2.9765377044677734\n",
|
159 |
+
"Total time for country: Cook Islands is 2.2189862728118896\n",
|
160 |
+
"Total time for country: Niue is -0.0008838176727294922\n",
|
161 |
+
"Total time for country: Comoros is 2.1896681785583496\n",
|
162 |
+
"Total time for country: Maldives is 2.1256864070892334\n",
|
163 |
+
"Total time for country: Cape Verde is 2.577415943145752\n",
|
164 |
+
"Total time for country: São Tomé and Príncipe is 2.6751351356506348\n",
|
165 |
+
"Total time for country: Saint Kitts and Nevis is 2.8594794273376465\n",
|
166 |
+
"Total time for country: Saint Lucia is 2.7586190700531006\n",
|
167 |
+
"Total time for country: Saint Vincent and the Grenadines is 1.8319523334503174\n",
|
168 |
+
"Total time for country: Dominica is 2.2509264945983887\n",
|
169 |
+
"Total time for country: Antigua and Barbuda is 2.793400526046753\n",
|
170 |
+
"Total time for country: Barbados is 2.134657859802246\n",
|
171 |
+
"Total time for country: Grenada is 1.9456777572631836\n",
|
172 |
+
"Total time for country: Belize is 3.2146828174591064\n",
|
173 |
+
"Total time for country: Suriname is 1.9713115692138672\n",
|
174 |
+
"Total time for country: Guyana is 3.29117751121521\n",
|
175 |
+
"Total time for country: Paraguay is 3.483541250228882\n"
|
176 |
+
]
|
177 |
+
}
|
178 |
+
],
|
179 |
+
"source": [
|
180 |
+
"for country in most_visited_countries[:]:\n",
|
181 |
+
" start = time.time()\n",
|
182 |
+
" params = {\n",
|
183 |
+
" \"engine\": \"google_hotels\",\n",
|
184 |
+
" \"q\": f\"{country} Hotels\",\n",
|
185 |
+
" \"check_in_date\": \"2024-03-16\",\n",
|
186 |
+
" \"check_out_date\": \"2024-03-17\",\n",
|
187 |
+
" \"adults\": \"2\",\n",
|
188 |
+
" \"currency\": \"USD\",\n",
|
189 |
+
" \"gl\": \"us\",\n",
|
190 |
+
" \"hl\": \"en\",\n",
|
191 |
+
" \"api_key\": serpapi_key\n",
|
192 |
+
" }\n",
|
193 |
+
" try:\n",
|
194 |
+
" result = serpapi.search(params)\n",
|
195 |
+
" except:\n",
|
196 |
+
" result = serpapi.GoogleSearch(params)\n",
|
197 |
+
" try:\n",
|
198 |
+
" all_hotels = [] # append hotel deails\n",
|
199 |
+
" # change this code to deep dive into dictionaries\n",
|
200 |
+
" for idx in result.get_json()['properties']:\n",
|
201 |
+
" if idx['type'] == \"hotel\":\n",
|
202 |
+
" # Extract required data\n",
|
203 |
+
" try:\n",
|
204 |
+
" name = idx['name']\n",
|
205 |
+
" check_in_time = idx['check_in_time']\n",
|
206 |
+
" check_out_time = idx['check_out_time']\n",
|
207 |
+
" lowest_rate_before_tax = idx['rate_per_night']['before_taxes_fees']\n",
|
208 |
+
" lowest_rate_after_tax = idx['total_rate']['lowest']\n",
|
209 |
+
" extracted_hotel_class = idx['extracted_hotel_class']\n",
|
210 |
+
" overall_rating = idx['overall_rating']\n",
|
211 |
+
" nearby_places_list = []\n",
|
212 |
+
" nearby_places = idx['nearby_places']\n",
|
213 |
+
" for place in nearby_places:\n",
|
214 |
+
" place_name = place['name']\n",
|
215 |
+
" nearby_places_list.append([place_name])\n",
|
216 |
+
" for trans_med in place['transportations']:\n",
|
217 |
+
" type_ = trans_med['type']\n",
|
218 |
+
" duration = trans_med['duration']\n",
|
219 |
+
" nearby_places_list[-1].append([type_, duration])\n",
|
220 |
+
" ratings = []\n",
|
221 |
+
" for dic in idx['ratings']:\n",
|
222 |
+
" ratings.append(dic['count'])\n",
|
223 |
+
" location_rating = idx['location_rating']\n",
|
224 |
+
" reviews_breakdown = idx['reviews_breakdown']\n",
|
225 |
+
" reviews_breakdown = []\n",
|
226 |
+
" for dic in idx['reviews_breakdown']:\n",
|
227 |
+
" if dic['name'] in [\"Service\", \"Nature\", \"Property\"]:\n",
|
228 |
+
" positive_reviews = round((dic['positive'] / dic['total_mentioned'])*100, 0)\n",
|
229 |
+
" negative_reviews = round((dic['negative'] / dic['total_mentioned'])*100, 0)\n",
|
230 |
+
" reviews_breakdown.append((dic['name'], positive_reviews, negative_reviews))\n",
|
231 |
+
" reviews_str = \"\"\n",
|
232 |
+
" for review in reviews_breakdown:\n",
|
233 |
+
" reviews_str += f\"{review[0]}: Positive Reviews -> {review[1]}% Negative Reviews -> {review[2]} \\n\"\n",
|
234 |
+
" nearby_places_str = \"\"\n",
|
235 |
+
" for place in nearby_places_list:\n",
|
236 |
+
" # print(place)\n",
|
237 |
+
" for idx, temp in enumerate(place):\n",
|
238 |
+
" if idx == 0:\n",
|
239 |
+
" temp_str = temp + \" Distance:\"\n",
|
240 |
+
" continue\n",
|
241 |
+
" temp_str += f\" {place[idx][-1]} by {place[idx][0]}\"\n",
|
242 |
+
" nearby_places_str += temp_str + \"\\n\"\n",
|
243 |
+
" hotel_details = f\"\"\"\n",
|
244 |
+
" Country : {country}\n",
|
245 |
+
" Name: {name},\n",
|
246 |
+
" Check in time: {check_in_time},\n",
|
247 |
+
" Check out time : {check_out_time},\n",
|
248 |
+
" Lowest rate (with tax) : {lowest_rate_after_tax},\n",
|
249 |
+
" Lowest rate (without tax) : {lowest_rate_before_tax},\n",
|
250 |
+
" Hotel Class : {extracted_hotel_class},\n",
|
251 |
+
" Overall rating: {overall_rating}\n",
|
252 |
+
" Reviews:\n",
|
253 |
+
" 5-start: {ratings[0]}, 4-start: {ratings[1]}, 3-start: {ratings[2]}, 2-start: {ratings[3]}, 1-start: {ratings[4]},\n",
|
254 |
+
" Location Rating: {location_rating},\n",
|
255 |
+
" Reviews Breakdown:\n",
|
256 |
+
" {reviews_str}\n",
|
257 |
+
" Nearby Places:\n",
|
258 |
+
" {nearby_places_str}\n",
|
259 |
+
" \"\"\"\n",
|
260 |
+
" all_hotels.append(hotel_details)\n",
|
261 |
+
" end = time.time()\n",
|
262 |
+
" except Exception as e:\n",
|
263 |
+
" continue\n",
|
264 |
+
" hotels.append(all_hotels)\n",
|
265 |
+
" print(f\"Total time for country: {country} is {end-start}\")\n",
|
266 |
+
" except:\n",
|
267 |
+
" continue\n",
|
268 |
+
"\n",
|
269 |
+
"\n"
|
270 |
+
]
|
271 |
+
},
|
272 |
+
{
|
273 |
+
"cell_type": "code",
|
274 |
+
"source": [
|
275 |
+
"hotels_str = \"\"\n",
|
276 |
+
"for country in hotels:\n",
|
277 |
+
" for hotel in country:\n",
|
278 |
+
" hotels_str += hotel.strip() + \"\\n\\n\"\n"
|
279 |
+
],
|
280 |
+
"metadata": {
|
281 |
+
"id": "oEKIsk5qqq54"
|
282 |
+
},
|
283 |
+
"execution_count": null,
|
284 |
+
"outputs": []
|
285 |
+
},
|
286 |
+
{
|
287 |
+
"cell_type": "code",
|
288 |
+
"source": [
|
289 |
+
"import pickle\n",
|
290 |
+
"with open(\"all_hotels_list.pkl\", \"wb\") as file:\n",
|
291 |
+
" pickle.dump(hotels, file)"
|
292 |
+
],
|
293 |
+
"metadata": {
|
294 |
+
"id": "m8Yg0Cneshej"
|
295 |
+
},
|
296 |
+
"execution_count": null,
|
297 |
+
"outputs": []
|
298 |
+
},
|
299 |
+
{
|
300 |
+
"cell_type": "code",
|
301 |
+
"source": [
|
302 |
+
"with open(\"all_hotels.txt\", \"w\") as file:\n",
|
303 |
+
" file.write(hotels_str)"
|
304 |
+
],
|
305 |
+
"metadata": {
|
306 |
+
"id": "k_VEcbCrr2z_"
|
307 |
+
},
|
308 |
+
"execution_count": null,
|
309 |
+
"outputs": []
|
310 |
+
}
|
311 |
+
],
|
312 |
+
"metadata": {
|
313 |
+
"colab": {
|
314 |
+
"provenance": []
|
315 |
+
},
|
316 |
+
"kernelspec": {
|
317 |
+
"display_name": "Python 3",
|
318 |
+
"name": "python3"
|
319 |
+
},
|
320 |
+
"language_info": {
|
321 |
+
"name": "python"
|
322 |
+
}
|
323 |
+
},
|
324 |
+
"nbformat": 4,
|
325 |
+
"nbformat_minor": 0
|
326 |
+
}
|
all_hotels.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
default.sqlite
ADDED
Binary file (123 kB). View file
|
|
travel.py
ADDED
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# import all necessary libraries
|
2 |
+
import os
|
3 |
+
import requests
|
4 |
+
import streamlit as st
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
import google.generativeai as genai
|
7 |
+
from langchain_anthropic import ChatAnthropic
|
8 |
+
from langchain_core.prompts import ChatPromptTemplate
|
9 |
+
from langchain_community.vectorstores.faiss import FAISS
|
10 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
11 |
+
from langchain_google_genai import GoogleGenerativeAIEmbeddings
|
12 |
+
|
13 |
+
# load api keys
|
14 |
+
load_dotenv()
|
15 |
+
|
16 |
+
# load models
|
17 |
+
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
|
18 |
+
chat = ChatAnthropic(temperature=0, anthropic_api_key=os.getenv("ANTHROPIC_API_KEY"), model_name="claude-3-opus-20240229")
|
19 |
+
|
20 |
+
# Define the API endpoint
|
21 |
+
url = "https://api.deepgram.com/v1/speak?model=aura-asteria-en"
|
22 |
+
|
23 |
+
# Set your Deepgram API key
|
24 |
+
|
25 |
+
# Define the headers
|
26 |
+
api_key = os.getenv("AURA_API_KEY")
|
27 |
+
headers = {
|
28 |
+
"Authorization": f"Token {api_key}",
|
29 |
+
"Content-Type": "application/json"
|
30 |
+
}
|
31 |
+
|
32 |
+
# Define the payload
|
33 |
+
|
34 |
+
|
35 |
+
def get_embeddings(user_query):
|
36 |
+
embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
|
37 |
+
new_db = FAISS.load_local("faiss_index", embeddings)
|
38 |
+
docs = new_db.similarity_search(user_query)
|
39 |
+
return docs
|
40 |
+
|
41 |
+
|
42 |
+
# Function to generate response based on user query
|
43 |
+
def get_response(chat, prompt, user_query):
|
44 |
+
system = (
|
45 |
+
"You are world best travel advisor. Advice the user in best possible"
|
46 |
+
)
|
47 |
+
human = prompt
|
48 |
+
prompt = ChatPromptTemplate.from_messages([("system", system), ("human", human)])
|
49 |
+
docs = get_embeddings(user_query)
|
50 |
+
chain = prompt | chat
|
51 |
+
output = chain.invoke(
|
52 |
+
{
|
53 |
+
"context": docs,
|
54 |
+
"question" : user_query
|
55 |
+
}
|
56 |
+
)
|
57 |
+
return output.content
|
58 |
+
|
59 |
+
# Streamlit app layout
|
60 |
+
def main():
|
61 |
+
st.title("Claudestay")
|
62 |
+
|
63 |
+
# api_key = st.text_input("Enter Anthropic API Key....")
|
64 |
+
# chat = ChatAnthropic(temperature=0, anthropic_api_key=api_key, model_name="claude-3-opus-20240229")
|
65 |
+
|
66 |
+
|
67 |
+
prompt = """
|
68 |
+
Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
|
69 |
+
provided context just say, "answer is not available in the context", don't provide the wrong answer.
|
70 |
+
You must provide answer in markdown table format.\n\n
|
71 |
+
Context:\n {context}?\n
|
72 |
+
Question: \n{question}\n
|
73 |
+
|
74 |
+
Answer:
|
75 |
+
"""
|
76 |
+
|
77 |
+
|
78 |
+
# Input box for user query
|
79 |
+
user_query = st.text_input("Enter your travel query:")
|
80 |
+
|
81 |
+
if st.button("Submit"):
|
82 |
+
with st.spinner("Fetching data..."):
|
83 |
+
text_response = get_response(chat, prompt, user_query)
|
84 |
+
payload = {
|
85 |
+
"text": text_response
|
86 |
+
}
|
87 |
+
|
88 |
+
# Make the POST request
|
89 |
+
st.markdown(f"**Response:** {text_response}")
|
90 |
+
# Check if the request was successful
|
91 |
+
response = requests.post(url, headers=headers, json=payload)
|
92 |
+
if response.status_code == 200:
|
93 |
+
# Save the response content to a file
|
94 |
+
with open("your_output_file.mp3", "wb") as f:
|
95 |
+
f.write(response.content)
|
96 |
+
st.audio(response.content)
|
97 |
+
print("File saved successfully.")
|
98 |
+
else:
|
99 |
+
print(f"Error: {response.status_code} - {response.text}")
|
100 |
+
|
101 |
+
|
102 |
+
if __name__ == "__main__":
|
103 |
+
main()
|
vectorstore.ipynb
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "markdown",
|
5 |
+
"metadata": {},
|
6 |
+
"source": [
|
7 |
+
"# Create Vector database"
|
8 |
+
]
|
9 |
+
},
|
10 |
+
{
|
11 |
+
"cell_type": "code",
|
12 |
+
"execution_count": 1,
|
13 |
+
"metadata": {},
|
14 |
+
"outputs": [],
|
15 |
+
"source": [
|
16 |
+
"import os\n",
|
17 |
+
"from dotenv import load_dotenv\n",
|
18 |
+
"import google.generativeai as genai\n",
|
19 |
+
"from langchain_anthropic import ChatAnthropic\n",
|
20 |
+
"from langchain_community.vectorstores.faiss import FAISS\n",
|
21 |
+
"from langchain_google_genai import ChatGoogleGenerativeAI\n",
|
22 |
+
"from langchain_google_genai import GoogleGenerativeAIEmbeddings\n",
|
23 |
+
"from langchain.text_splitter import RecursiveCharacterTextSplitter"
|
24 |
+
]
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"cell_type": "code",
|
28 |
+
"execution_count": 2,
|
29 |
+
"metadata": {},
|
30 |
+
"outputs": [],
|
31 |
+
"source": [
|
32 |
+
"load_dotenv()\n",
|
33 |
+
"genai.configure(api_key=os.getenv(\"GOOGLE_API_KEY\"))"
|
34 |
+
]
|
35 |
+
},
|
36 |
+
{
|
37 |
+
"cell_type": "code",
|
38 |
+
"execution_count": 5,
|
39 |
+
"metadata": {},
|
40 |
+
"outputs": [],
|
41 |
+
"source": [
|
42 |
+
"def get_text_chunks(text):\n",
|
43 |
+
" text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)\n",
|
44 |
+
" chunks = text_splitter.split_text(text)\n",
|
45 |
+
" return chunks"
|
46 |
+
]
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"cell_type": "code",
|
50 |
+
"execution_count": 6,
|
51 |
+
"metadata": {},
|
52 |
+
"outputs": [],
|
53 |
+
"source": [
|
54 |
+
"def get_vector_store(text_chunks):\n",
|
55 |
+
" embeddings = GoogleGenerativeAIEmbeddings(model = \"models/embedding-001\")\n",
|
56 |
+
" vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)\n",
|
57 |
+
" vector_store.save_local(\"faiss_index\")"
|
58 |
+
]
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"cell_type": "code",
|
62 |
+
"execution_count": 7,
|
63 |
+
"metadata": {},
|
64 |
+
"outputs": [],
|
65 |
+
"source": [
|
66 |
+
"with open(\"all_hotels.txt\", \"r\", encoding=\"utf8\") as file:\n",
|
67 |
+
" text = file.read()"
|
68 |
+
]
|
69 |
+
},
|
70 |
+
{
|
71 |
+
"cell_type": "code",
|
72 |
+
"execution_count": 9,
|
73 |
+
"metadata": {},
|
74 |
+
"outputs": [],
|
75 |
+
"source": [
|
76 |
+
"chunks = get_text_chunks(text)\n",
|
77 |
+
"get_vector_store(chunks)"
|
78 |
+
]
|
79 |
+
}
|
80 |
+
],
|
81 |
+
"metadata": {
|
82 |
+
"kernelspec": {
|
83 |
+
"display_name": "Python 3",
|
84 |
+
"language": "python",
|
85 |
+
"name": "python3"
|
86 |
+
},
|
87 |
+
"language_info": {
|
88 |
+
"codemirror_mode": {
|
89 |
+
"name": "ipython",
|
90 |
+
"version": 3
|
91 |
+
},
|
92 |
+
"file_extension": ".py",
|
93 |
+
"mimetype": "text/x-python",
|
94 |
+
"name": "python",
|
95 |
+
"nbconvert_exporter": "python",
|
96 |
+
"pygments_lexer": "ipython3",
|
97 |
+
"version": "3.11.3"
|
98 |
+
}
|
99 |
+
},
|
100 |
+
"nbformat": 4,
|
101 |
+
"nbformat_minor": 2
|
102 |
+
}
|