Chintan Donda commited on
Commit
1921a14
1 Parent(s): 8f40cff

Updating widgets, automatically populate dropdowns/textboxes upon event trigger

Browse files
Files changed (6) hide show
  1. app.py +99 -52
  2. requirements.txt +2 -1
  3. src/constants.py +53 -46
  4. src/data_loader.py +5 -4
  5. src/langchain_utils.py +4 -4
  6. src/weather.py +48 -162
app.py CHANGED
@@ -4,6 +4,7 @@ import datetime
4
 
5
  import src.constants as constants_utils
6
  import src.kkms_kssw as kkms_kssw
 
7
 
8
  os.environ["CURL_CA_BUNDLE"] = ""
9
 
@@ -140,6 +141,25 @@ class DomState:
140
  return self.indic_translation
141
 
142
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  def _upload_file(self, files):
144
  file_paths = [file.name for file in files]
145
  return file_paths
@@ -269,7 +289,7 @@ with gr.Blocks(title='KKMS-KSSW Demo') as demo:
269
  with gr.Tab(label='Relevant paragraphs'):
270
  question_category = gr.Dropdown(
271
  constants_utils.INDEX_CATEGORY,
272
- label="Select Query Type")
273
  question = gr.Textbox(label="Enter your question", placeholder='Type the question here')
274
  # Get the Relevant paragraphs for the question asked
275
  relevant_paragraphs = gr.Textbox(label="Relevant paragraphs are:", value=dom.relevant_paragraphs, interactive=False)
@@ -284,34 +304,37 @@ with gr.Blocks(title='KKMS-KSSW Demo') as demo:
284
  with gr.Tab(label='Sources of relevant paragraphs'):
285
  # Get the Sources of relevant paragraphs
286
  sources_relevant_paragraphs = gr.Textbox(label="Sources of relevant paragraphs are:", interactive=False)
287
- b_sources_relevant_paragraphs = gr.Button("Get Sources of relevant paragraphs").style(size='sm')
288
- b_sources_relevant_paragraphs.click(fn=dom.click_handler_for_relevant_paragraphs_source, inputs=relevant_paragraphs, outputs=[sources_relevant_paragraphs])
 
 
 
289
 
290
- # NOTE: Don't show extractive summary unless requested by FTA.
291
- # with gr.Column(scale=1, min_width=600):
292
- # with gr.Tab(label='Extractive Summary'):
293
- # # Get the extractive text summary from the retrieved Relevant paragraphs
294
- # summary = gr.Textbox(label="Extractive Summary is:", value=dom.summary, interactive=False)
295
- # b_summary = gr.Button("Extract Summary").style(size='sm')
296
- # b_summary.click(fn=dom.click_handler_for_summary, inputs=relevant_paragraphs, outputs=[summary])
297
-
298
- # Get the exact answer for the question asked from the retrieved Relevant paragraphs
299
- with gr.Column(scale=1, min_width=600):
300
- with gr.Tab(label='Answer'):
301
- answer = gr.Textbox(label="Answer is:", value=dom.answer, interactive=False)
302
- b_answer = gr.Button("Get Answer").style(size='sm')
303
- b_answer.click(fn=dom.click_handler_for_get_answer, inputs=[relevant_paragraphs, question], outputs=[answer])
304
-
305
- # Covert the answer to Indian language
306
- with gr.Column(scale=1, min_width=600):
307
- with gr.Tab(label='Answer in selected language'):
308
- # Select the language
309
- language = gr.Dropdown(
310
- ['English', 'Hindi', 'Gujarati', 'Marathi', 'Kannada', 'Bengali', 'Panjabi', 'Telugu', 'Tamil', 'Malayalam'],
311
- label="Select language")
312
- indic_lang_answer = gr.Textbox(label="Answer in the selected language is:", value=dom.indic_translation, interactive=False)
313
- b_indic_lang_answer = gr.Button("Get answer in selected language").style(size='sm')
314
- b_indic_lang_answer.click(fn=dom.click_handler_for_get_indic_translation, inputs=[answer, language], outputs=[indic_lang_answer])
315
 
316
 
317
  #############################################################################
@@ -362,52 +385,76 @@ with gr.Blocks(title='KKMS-KSSW Demo') as demo:
362
  label="Select state"
363
  )
364
 
365
- # # Select district
366
- # district = gr.Dropdown(
367
- # weather_utils.STATES.get(state, {}),
368
- # label="Select district"
369
- # )
370
 
371
- district = gr.Textbox(label="Enter district name", placeholder='Type the district name here')
372
- district_weather = gr.Textbox(label=f"Weather forecast is:", value=dom.weather_forecast, interactive=False)
373
- bd_weather = gr.Button("Get weather forecast").style(size='sm')
374
- bd_weather.click(fn=dom.click_handler_for_get_weather_forecast, inputs=[state, district], outputs=[district_weather])
375
-
 
 
 
 
 
 
 
 
 
 
376
  with gr.Column(scale=1, min_width=600):
377
  with gr.Tab(label='Weather Forecast Summary'):
378
  # Get the summary of the weather forecast
379
- weather_forecast_summary = gr.Textbox(label="Weather Forecast Summary is:", value=dom.weather_forecast_summary, interactive=False)
380
- b_weather_forecast_summary = gr.Button("Get Weather Forecast Summary").style(size='sm')
381
- b_weather_forecast_summary.click(fn=dom.click_handler_for_weather_forecast_summary, inputs=district_weather, outputs=[weather_forecast_summary])
 
 
 
382
 
383
  # Covert the weather forcast summary in Indian language
384
  with gr.Column(scale=1, min_width=600):
385
  with gr.Tab(label='Weather Forecast Summary in selected language'):
386
  # Select the language
387
  language = gr.Dropdown(
388
- ['English', 'Hindi', 'Gujarati', 'Marathi', 'Kannada', 'Bengali', 'Panjabi', 'Telugu', 'Tamil', 'Malayalam'],
389
  label="Select language")
390
- indic_weather_forecast_summary = gr.Textbox(label="Weather Forecast Summary in the selected language is:", value=dom.indic_translation, interactive=False)
 
 
 
 
 
 
 
 
 
391
  b_indic_weather_forecast_summary = gr.Button("Get answer in selected language").style(size='sm')
392
  b_indic_weather_forecast_summary.click(fn=dom.click_handler_for_get_indic_translation, inputs=[weather_forecast_summary, language], outputs=[indic_weather_forecast_summary])
393
 
394
  with gr.Column(scale=1, min_width=600):
395
- # with gr.Tab(label='Weather Info'):
396
- city = gr.Textbox(label="Enter city name", placeholder='Type the city name here')
397
- weather = gr.Textbox(label=f"Current weather is:", value=dom.weather_info, interactive=False)
398
- b_weather = gr.Button("Get weather info").style(size='sm')
399
- b_weather.click(fn=dom.click_handler_for_get_weather, inputs=city, outputs=[weather])
 
 
400
 
401
 
402
  #############################################################################
403
  # Widget to load and process from the custom data source
404
  with gr.Row(visible=False) as rowLoadCustomData:
405
  with gr.Column(scale=1, min_width=600):
406
- with gr.Tab(label='Load Custom Data'):
407
  question_category = gr.Dropdown(
408
  constants_utils.INDEX_CATEGORY,
409
  label="Select Query Type")
410
-
411
  doc_type = gr.Radio(
412
  list(constants_utils.DATA_SOURCES.keys()),
413
  label="Select data source (Supports uploading multiple Files/URLs)",
@@ -431,7 +478,7 @@ with gr.Blocks(title='KKMS-KSSW Demo') as demo:
431
 
432
  with gr.Row(visible=False) as rowUploadOnlinePdf:
433
  with gr.Column(scale=1, min_width=600):
434
- urls = gr.Textbox(label="Enter URLs for Online PDF (Supports uploading from multiple URLs. Enter the URLs in comma (,) separated format)", placeholder='Type the URLs here')
435
  b_urls = gr.Button("Load Online PDFs").style(size='sm')
436
  b_urls.click(
437
  fn=dom.click_handler_for_load_files_urls,
@@ -455,7 +502,7 @@ with gr.Blocks(title='KKMS-KSSW Demo') as demo:
455
 
456
  with gr.Row(visible=False) as rowUploadUrls:
457
  with gr.Column(scale=1, min_width=600):
458
- urls = gr.Textbox(label="Enter URLs (Supports uploading from multiple URLs. Enter the URLs in comma (,) separated format)", placeholder='Type the URLs here')
459
  b_urls = gr.Button("Load URLs").style(size='sm')
460
  b_urls.click(
461
  fn=dom.click_handler_for_load_files_urls,
 
4
 
5
  import src.constants as constants_utils
6
  import src.kkms_kssw as kkms_kssw
7
+ import src.weather as weather_utils
8
 
9
  os.environ["CURL_CA_BUNDLE"] = ""
10
 
 
141
  return self.indic_translation
142
 
143
 
144
+ def click_handler_for_weather_forecast_districts_dropdown_list_update(
145
+ self,
146
+ state,
147
+ district
148
+ ):
149
+ return gr.update(
150
+ choices=self.kkms_kssw_obj.weather_utils_obj.get_district_names(state)
151
+ )
152
+
153
+
154
+ def click_handler_for_weather_forecast_district(
155
+ self,
156
+ state,
157
+ district,
158
+ weather
159
+ ):
160
+ return self.kkms_kssw_obj.weather_utils_obj.get_weather_forecast(state, district)
161
+
162
+
163
  def _upload_file(self, files):
164
  file_paths = [file.name for file in files]
165
  return file_paths
 
289
  with gr.Tab(label='Relevant paragraphs'):
290
  question_category = gr.Dropdown(
291
  constants_utils.INDEX_CATEGORY,
292
+ label="Select Question Category")
293
  question = gr.Textbox(label="Enter your question", placeholder='Type the question here')
294
  # Get the Relevant paragraphs for the question asked
295
  relevant_paragraphs = gr.Textbox(label="Relevant paragraphs are:", value=dom.relevant_paragraphs, interactive=False)
 
304
  with gr.Tab(label='Sources of relevant paragraphs'):
305
  # Get the Sources of relevant paragraphs
306
  sources_relevant_paragraphs = gr.Textbox(label="Sources of relevant paragraphs are:", interactive=False)
307
+ relevant_paragraphs.change(
308
+ dom.click_handler_for_relevant_paragraphs_source,
309
+ relevant_paragraphs,
310
+ sources_relevant_paragraphs
311
+ )
312
 
313
+ # Get the exact answer for the question asked from the retrieved Relevant paragraphs
314
+ with gr.Column(scale=1, min_width=600):
315
+ with gr.Tab(label='Answer'):
316
+ answer = gr.Textbox(label="Answer is:", value=dom.answer, interactive=False)
317
+ relevant_paragraphs.change(
318
+ dom.click_handler_for_get_answer,
319
+ [relevant_paragraphs, question],
320
+ answer
321
+ )
322
+
323
+ # Covert the answer to Indian language
324
+ with gr.Column(scale=1, min_width=600):
325
+ with gr.Tab(label='Answer in selected language'):
326
+ # Select the language
327
+ language = gr.Dropdown(
328
+ list(constants_utils.INDIC_LANGUAGE.keys()),
329
+ label="Select language")
330
+ indic_lang_answer = gr.Textbox(label="Answer in the selected language is:", interactive=False)
331
+ answer.change(
332
+ dom.click_handler_for_get_indic_translation,
333
+ answer,
334
+ indic_lang_answer
335
+ )
336
+ b_indic_lang_answer = gr.Button("Get answer in selected language").style(size='sm')
337
+ b_indic_lang_answer.click(fn=dom.click_handler_for_get_indic_translation, inputs=[answer, language], outputs=[indic_lang_answer])
338
 
339
 
340
  #############################################################################
 
385
  label="Select state"
386
  )
387
 
388
+ # Select District
389
+ district = gr.Dropdown(
390
+ choices=[],
391
+ label="Select District"
392
+ )
393
 
394
+ # Get districts of the selected state
395
+ state.change(
396
+ dom.click_handler_for_weather_forecast_districts_dropdown_list_update,
397
+ state,
398
+ district
399
+ )
400
+
401
+ # Get weather forecast on district selection event
402
+ district_weather = gr.Textbox(label=f"Weather forecast is:", interactive=False)
403
+ district.change(
404
+ dom.click_handler_for_weather_forecast_district,
405
+ [state, district],
406
+ district_weather
407
+ )
408
+
409
  with gr.Column(scale=1, min_width=600):
410
  with gr.Tab(label='Weather Forecast Summary'):
411
  # Get the summary of the weather forecast
412
+ weather_forecast_summary = gr.Textbox(label="Weather Forecast Summary is:", interactive=False)
413
+ district.change(
414
+ dom.click_handler_for_weather_forecast_summary,
415
+ district_weather,
416
+ weather_forecast_summary
417
+ )
418
 
419
  # Covert the weather forcast summary in Indian language
420
  with gr.Column(scale=1, min_width=600):
421
  with gr.Tab(label='Weather Forecast Summary in selected language'):
422
  # Select the language
423
  language = gr.Dropdown(
424
+ list(constants_utils.INDIC_LANGUAGE.keys()),
425
  label="Select language")
426
+ indic_weather_forecast_summary = gr.Textbox(label="Weather Forecast Summary in the selected language is:", interactive=False)
427
+
428
+ # By default display weather forecast summary in Hindi. User can change it later on.
429
+ weather_forecast_summary.change(
430
+ dom.click_handler_for_get_indic_translation,
431
+ weather_forecast_summary,
432
+ indic_weather_forecast_summary
433
+ )
434
+
435
+ # User can get the weather forecast summary in their preferred language as well
436
  b_indic_weather_forecast_summary = gr.Button("Get answer in selected language").style(size='sm')
437
  b_indic_weather_forecast_summary.click(fn=dom.click_handler_for_get_indic_translation, inputs=[weather_forecast_summary, language], outputs=[indic_weather_forecast_summary])
438
 
439
  with gr.Column(scale=1, min_width=600):
440
+ with gr.Tab(label='Weather Info'):
441
+ weather = gr.Textbox(label=f"Current weather is:", interactive=False)
442
+ district.change(
443
+ dom.click_handler_for_get_weather,
444
+ district,
445
+ weather
446
+ )
447
 
448
 
449
  #############################################################################
450
  # Widget to load and process from the custom data source
451
  with gr.Row(visible=False) as rowLoadCustomData:
452
  with gr.Column(scale=1, min_width=600):
453
+ with gr.Tab(label='Load Custom Data (Do not upload data from the same file/url again. Once it is uploaded, it gets stored forever.)'):
454
  question_category = gr.Dropdown(
455
  constants_utils.INDEX_CATEGORY,
456
  label="Select Query Type")
457
+
458
  doc_type = gr.Radio(
459
  list(constants_utils.DATA_SOURCES.keys()),
460
  label="Select data source (Supports uploading multiple Files/URLs)",
 
478
 
479
  with gr.Row(visible=False) as rowUploadOnlinePdf:
480
  with gr.Column(scale=1, min_width=600):
481
+ urls = gr.Textbox(label="Enter URLs for Online PDF (Supports uploading from multiple URLs. Enter the URLs in comma (,) separated format.)", placeholder='Type the URLs here')
482
  b_urls = gr.Button("Load Online PDFs").style(size='sm')
483
  b_urls.click(
484
  fn=dom.click_handler_for_load_files_urls,
 
502
 
503
  with gr.Row(visible=False) as rowUploadUrls:
504
  with gr.Column(scale=1, min_width=600):
505
+ urls = gr.Textbox(label="Enter URLs (Supports uploading from multiple URLs. Enter the URLs in comma (,) separated format.)", placeholder='Type the URLs here')
506
  b_urls = gr.Button("Load URLs").style(size='sm')
507
  b_urls.click(
508
  fn=dom.click_handler_for_load_files_urls,
requirements.txt CHANGED
@@ -18,4 +18,5 @@ tiktoken
18
  googletrans==3.1.0a0
19
  BeautifulSoup4
20
  pypdf
21
- PyPDF2
 
 
18
  googletrans==3.1.0a0
19
  BeautifulSoup4
20
  pypdf
21
+ PyPDF2
22
+ html2text
src/constants.py CHANGED
@@ -1,5 +1,6 @@
1
  import os
2
  import src.web_crawler as web_crawler_utils
 
3
 
4
  LOAD_FROM_EXISTING_INDEX_STORE = False
5
  INDEX_TYPE = 'FAISS'
@@ -17,10 +18,10 @@ if not os.path.exists(OUTPUT_PATH):
17
  INDEX_CATEGORY = [
18
  'crops',
19
  'fruits',
20
- # 'pest_management',
21
- # 'govt_policy',
22
- # 'insurance',
23
- # 'soil',
24
  'general',
25
  'vegetables'
26
  ]
@@ -28,25 +29,27 @@ INDEX_CATEGORY = [
28
  # Doctype of the master index of each index category. Master index for each index category would be stored under this key.
29
  INDEX_CATEGORY_MASTER_INDEX_DOC_TYPE = 'master'
30
 
31
- # Output index name if creating the index/vector store using GPTSimpleVectorIndex
32
- INDEX_FILENAME = os.path.join(OUTPUT_PATH, 'index.json')
33
-
34
  # List of data sources/types & from where to load the data and create the index/vector store
35
  # 2nd item is the type of source from where the data would be loaded. Currently it could come from either a file or URL.
36
  DATA_SOURCES = {
37
  'PDF': 'pdf',
38
  'Text File': 'textfile',
39
  'Online PDF': 'online_pdf', # web_crawler_utils.get_ipm_packages_pdfs_urls()[:1]
40
- # 'URLs': 'urls',
41
  }
42
 
43
  # LangChain related constants
 
 
 
44
  TEXT_SPLITTER_CHUNK_SIZE = 1000
45
  TEXT_SPLITTER_CHUNK_OVERLAP = 0
46
  TEXT_SPLITTER_SEPARATOR = '\n\n'
47
 
48
 
49
  URLS = [
 
 
50
  'https://agricoop.nic.in/#gsc.tab=0',
51
 
52
  'https://dmi.gov.in/Documents/GrantCAGrapes.pdf',
@@ -120,44 +123,48 @@ MANDI_PRICE_STATES = [
120
  ]
121
 
122
  # State list used in the Weather forecast widget dropdown list
123
- WEATHER_FORECAST_STATES = [
124
- 'Andaman-Nicobar',
125
- 'Andhra-Pradesh',
126
- 'Arunachal-Pradesh',
127
- 'Assam',
128
- 'Bihar',
129
- 'Chandigarh',
130
- 'Chhattisgarh',
131
- 'Dadra-and-Nagar-Haveli',
132
- 'Daman-and-Diu',
133
- 'Delhi',
134
- 'Goa',
135
- 'Gujarat',
136
- 'Haryana',
137
- 'Himachal-Pradesh',
138
- 'Jammu-Kashmir',
139
- 'Jharkhand',
140
- 'Karnataka',
141
- 'Kerala',
142
- 'Lakshadweep',
143
- 'Madhya-Pradesh',
144
- 'Maharashtra',
145
- 'Manipur',
146
- 'Meghalaya',
147
- 'Mizoram',
148
- 'Nagaland',
149
- 'Odisha',
150
- 'Pondicherry',
151
- 'Punjab',
152
- 'Rajasthan',
153
- 'Sikkim',
154
- 'Tamilnadu',
155
- 'Telangana',
156
- 'Tripura',
157
- 'Uttar-Pradesh',
158
- 'Uttarakhand',
159
- 'West-Bengal'
160
- ]
 
 
 
 
161
 
162
  # LIST OF PESTICIDES WHICH ARE BANNED AND RESTRICTED USE (List created from: https://pib.gov.in/PressReleaseIframePage.aspx?PRID=1896140)
163
  BANNED_PESTICIDES_FORMULATIONS = [
 
1
  import os
2
  import src.web_crawler as web_crawler_utils
3
+ import src.weather as weather_utils
4
 
5
  LOAD_FROM_EXISTING_INDEX_STORE = False
6
  INDEX_TYPE = 'FAISS'
 
18
  INDEX_CATEGORY = [
19
  'crops',
20
  'fruits',
21
+ 'pest_management',
22
+ 'govt_policy',
23
+ 'insurance',
24
+ 'soil',
25
  'general',
26
  'vegetables'
27
  ]
 
29
  # Doctype of the master index of each index category. Master index for each index category would be stored under this key.
30
  INDEX_CATEGORY_MASTER_INDEX_DOC_TYPE = 'master'
31
 
 
 
 
32
  # List of data sources/types & from where to load the data and create the index/vector store
33
  # 2nd item is the type of source from where the data would be loaded. Currently it could come from either a file or URL.
34
  DATA_SOURCES = {
35
  'PDF': 'pdf',
36
  'Text File': 'textfile',
37
  'Online PDF': 'online_pdf', # web_crawler_utils.get_ipm_packages_pdfs_urls()[:1]
38
+ 'URLs': 'urls',
39
  }
40
 
41
  # LangChain related constants
42
+ SIMILARITY_TOP_K = 1
43
+ MODE = 'embedding'
44
+ RESPONSE_MODE = 'default'
45
  TEXT_SPLITTER_CHUNK_SIZE = 1000
46
  TEXT_SPLITTER_CHUNK_OVERLAP = 0
47
  TEXT_SPLITTER_SEPARATOR = '\n\n'
48
 
49
 
50
  URLS = [
51
+ # Govt. Schemes
52
+ 'https://agricoop.nic.in/en/Major#gsc.tab=0'
53
  'https://agricoop.nic.in/#gsc.tab=0',
54
 
55
  'https://dmi.gov.in/Documents/GrantCAGrapes.pdf',
 
123
  ]
124
 
125
  # State list used in the Weather forecast widget dropdown list
126
+ weather_utils_obj = weather_utils.WEATHER()
127
+ WEATHER_FORECAST_STATES = weather_utils_obj.get_state_names()
128
+
129
+ WEATHER_FORECAST_STATE_CODES = {
130
+ 'Andaman-Nicobar': '01',
131
+ 'Andhra-Pradesh': '02',
132
+ 'Arunachal-Pradesh': '03',
133
+ 'Assam': '04',
134
+ 'Bihar': '05',
135
+ 'Chandigarh': '06',
136
+ 'Chhattisgarh': '07',
137
+ 'Dadra-and-Nagar-Haveli': '08',
138
+ 'Daman-and-Diu': '09',
139
+ 'Delhi': '10',
140
+ 'Goa': '11',
141
+ 'Gujarat': '12',
142
+ 'Haryana': '13',
143
+ # 14
144
+ 'Himachal-Pradesh': '15',
145
+ 'Jammu-Kashmir': '16',
146
+ 'Jharkhand': '17',
147
+ 'Karnataka': '18',
148
+ 'Kerala': '19',
149
+ 'Lakshadweep': '20',
150
+ 'Madhya-Pradesh': '21',
151
+ 'Maharashtra': '22',
152
+ 'Manipur': '23',
153
+ 'Meghalaya': '24',
154
+ 'Mizoram': '25',
155
+ 'Nagaland': '26',
156
+ 'Odisha': '27',
157
+ 'Pondicherry': '28',
158
+ 'Punjab': '29',
159
+ 'Rajasthan': '30',
160
+ 'Sikkim': '31',
161
+ 'Tamilnadu': '32',
162
+ 'Telangana': '33',
163
+ 'Tripura': '34',
164
+ 'Uttar-Pradesh': '35',
165
+ 'Uttarakhand': '36',
166
+ 'West-Bengal': '37',
167
+ }
168
 
169
  # LIST OF PESTICIDES WHICH ARE BANNED AND RESTRICTED USE (List created from: https://pib.gov.in/PressReleaseIframePage.aspx?PRID=1896140)
170
  BANNED_PESTICIDES_FORMULATIONS = [
src/data_loader.py CHANGED
@@ -4,11 +4,12 @@ import pandas as pd
4
  from pathlib import Path
5
  import glob
6
 
7
- from llama_index import GPTSimpleVectorIndex, download_loader, SimpleDirectoryReader
8
  from langchain.document_loaders import PyPDFLoader, TextLoader
9
  from langchain.agents import initialize_agent, Tool
10
  from langchain.llms import OpenAI
11
  from langchain.chains.conversation.memory import ConversationBufferMemory
 
12
 
13
  import src.utils as utils
14
 
@@ -115,10 +116,10 @@ class DATA_LOADER:
115
  urls=urls,
116
  url_type=doc_type
117
  )
118
- BeautifulSoupWebReader = download_loader("BeautifulSoupWebReader")
119
- loader = BeautifulSoupWebReader()
120
  # Load data from URLs
121
- documents = loader.load_data(urls=valid_urls)
 
 
122
 
123
  # Load data from text file(s)
124
  elif doc_type == 'textfile':
 
4
  from pathlib import Path
5
  import glob
6
 
7
+ from llama_index import GPTSimpleVectorIndex, download_loader, SimpleDirectoryReader, SimpleWebPageReader
8
  from langchain.document_loaders import PyPDFLoader, TextLoader
9
  from langchain.agents import initialize_agent, Tool
10
  from langchain.llms import OpenAI
11
  from langchain.chains.conversation.memory import ConversationBufferMemory
12
+ from langchain.docstore.document import Document
13
 
14
  import src.utils as utils
15
 
 
116
  urls=urls,
117
  url_type=doc_type
118
  )
 
 
119
  # Load data from URLs
120
+ docs = SimpleWebPageReader(html_to_text=True).load_data(valid_urls)
121
+ docs = [Document(page_content=doc.text) for doc in docs]
122
+ documents.extend(docs)
123
 
124
  # Load data from text file(s)
125
  elif doc_type == 'textfile':
src/langchain_utils.py CHANGED
@@ -645,9 +645,9 @@ class LANGCHAIN_UTILS:
645
  def query(self,
646
  question,
647
  question_category,
648
- mode='embedding',
649
- response_mode="default",
650
- similarity_top_k=2,
651
  required_keywords=[],
652
  exclude_keywords=[],
653
  verbose=False
@@ -759,7 +759,7 @@ class LANGCHAIN_UTILS:
759
  files_or_urls,
760
  index_category
761
  ):
762
- logger.info(f'Uploading data for: {index_category}-{doc_type}')
763
 
764
  self.documents = []
765
  self.index = None
 
645
  def query(self,
646
  question,
647
  question_category,
648
+ mode=constants_utils.MODE,
649
+ response_mode=constants_utils.RESPONSE_MODE,
650
+ similarity_top_k=constants_utils.SIMILARITY_TOP_K,
651
  required_keywords=[],
652
  exclude_keywords=[],
653
  verbose=False
 
759
  files_or_urls,
760
  index_category
761
  ):
762
+ logger.info(f'Uploading data for: {index_category}; from: {doc_type}')
763
 
764
  self.documents = []
765
  self.index = None
src/weather.py CHANGED
@@ -1,178 +1,61 @@
1
  import requests
2
  from bs4 import BeautifulSoup as bs
 
3
 
4
 
5
- STATE_CODES = {
6
- 'Andaman-Nicobar': '01',
7
- 'Andhra-Pradesh': '02',
8
- 'Arunachal-Pradesh': '03',
9
- 'Assam': '04',
10
- 'Bihar': '05',
11
- 'Chandigarh': '06',
12
- 'Chhattisgarh': '07',
13
- 'Dadra-and-Nagar-Haveli': '08',
14
- 'Daman-and-Diu': '09',
15
- 'Delhi': '10',
16
- 'Goa': '11',
17
- 'Gujarat': '12',
18
- 'Haryana': '13',
19
- # 14
20
- 'Himachal-Pradesh': '15',
21
- 'Jammu-Kashmir': '16',
22
- 'Jharkhand': '17',
23
- 'Karnataka': '18',
24
- 'Kerala': '19',
25
- 'Lakshadweep': '20',
26
- 'Madhya-Pradesh': '21',
27
- 'Maharashtra': '22',
28
- 'Manipur': '23',
29
- 'Meghalaya': '24',
30
- 'Mizoram': '25',
31
- 'Nagaland': '26',
32
- 'Odisha': '27',
33
- 'Pondicherry': '28',
34
- 'Punjab': '29',
35
- 'Rajasthan': '30',
36
- 'Sikkim': '31',
37
- 'Tamilnadu': '32',
38
- 'Telangana': '33',
39
- 'Tripura': '34',
40
- 'Uttar-Pradesh': '35',
41
- 'Uttarakhand': '36',
42
- 'West-Bengal': '37',
43
- }
44
-
45
- # List of states that are given as the input selection to https://nwp.imd.gov.in/blf/blf_temp/ to get the weather forecast
46
- STATES = {
47
- 'Andaman-Nicobar': {},
48
-
49
- 'Andhra-Pradesh': {},
50
-
51
- 'Arunachal-Pradesh': {},
52
-
53
- 'Assam': {},
54
-
55
- 'Bihar': {},
56
-
57
- 'Chandigarh': {},
58
-
59
- 'Chhattisgarh': {},
60
-
61
- 'Dadra-and-Nagar-Haveli': {},
62
-
63
- 'Daman-and-Diu': {},
64
-
65
- 'Delhi': {
66
- 'CENTRAL-DELHI': ['CENTRAL-DELHI'],
67
- 'EAST-DELHI': ['EAST-DELHI'],
68
- 'NEW-DELHI': ['NEW-DELHI'],
69
- 'NORTH-DELHI': ['NORTH-DELHI'],
70
- 'NORTH-EAST-DELHI': ['NORTH-EAST-DELHI'],
71
- 'NORTH-WEST-DELHI': ['NORTH-WEST-DELHI'],
72
- 'SHAHDARA': ['SHAHDARA'],
73
- 'SOUTH-DELHI': ['SOUTH-DELHI'],
74
- 'SOUTH-EAST-DELHI': ['SOUTH-EAST-DELHI'],
75
- 'SOUTH-WEST-DELHI': ['SOUTH-WEST-DELHI'],
76
- 'WEST-DELHI': ['WEST-DELHI'],
77
- },
78
-
79
- 'Goa': {},
80
-
81
- 'Gujarat': {
82
- 'AHMADABAD': ['AHMEDABAD-CITY', 'BAVLA', 'DASKROI', 'DETROJ-RAMPURA', 'DHANDHUKA', 'DHOLERA', 'DHOLKA', 'MANDAL', 'SANAND', 'VIRAMGAM'],
83
- 'AMRELI': ['AMRELI', 'BABRA', 'BAGASARA', 'DHARI', 'JAFRABAD', 'KHAMBHA', 'KUNKAVAV-VADIA', 'LATHI', 'LILIA', 'RAJULA', 'SAVERKUNDLA'],
84
- 'ANAND': [],
85
- 'ARVALLI': [],
86
- 'BANASKANTHA': [],
87
- 'BHARUCH': [],
88
- 'BHAVNAGAR': [],
89
- 'BOTAD': [],
90
- 'CHHOTAUDEPUR': [],
91
- 'DANG': [],
92
- 'DEVBHUMI-DWARKA': [],
93
- 'DOHAD': [],
94
- 'GANDHINAGAR': [],
95
- 'GIR-SOMNATH': [],
96
- 'JAMNAGAR': [],
97
- 'JUNAGADH': [],
98
- 'KACHCHH': [],
99
- 'KHEDA': [],
100
- 'MAHESANA': [],
101
- 'MAHISAGAR': [],
102
- 'MORBI': [],
103
- 'NARMADA': [],
104
- 'NAVSARI': [],
105
- 'PANCH-MAHALS': [],
106
- 'PATAN': [],
107
- 'PORBANDAR': [],
108
- 'RAJKOT': [],
109
- 'SABAR-KANTHA': [],
110
- 'SURAT': ['BARDOLI', 'CHORASI', 'KAMREJ', 'MAHUVA', 'MANDVI', 'MANGROL', 'OLPAD', 'PALSANA', 'SURAT-CITY', 'UMARPADA'],
111
- 'SURENDRANAGAR': [],
112
- 'TAPI': [],
113
- 'VADODARA': [],
114
- 'VALSAD': [],
115
- },
116
-
117
- 'Haryana': {},
118
-
119
- 'Himachal-Pradesh': {},
120
-
121
- 'Jammu-Kashmir': {},
122
-
123
- 'Jharkhand': {},
124
-
125
- 'Karnataka': {},
126
-
127
- 'Kerala': {},
128
-
129
- 'Lakshadweep': {},
130
-
131
- 'Madhya-Pradesh': {},
132
-
133
- 'Maharashtra': {},
134
-
135
- 'Manipur': {},
136
-
137
- 'Meghalaya': {},
138
-
139
- 'Mizoram': {},
140
-
141
- 'Nagaland': {},
142
-
143
- 'Odisha': {},
144
-
145
- 'Pondicherry': {},
146
-
147
- 'Punjab': {},
148
-
149
- 'Rajasthan': {},
150
-
151
- 'Sikkim': {},
152
-
153
- 'Tamilnadu': {},
154
-
155
- 'Telangana': {},
156
 
157
- 'Tripura': {},
 
 
 
 
 
 
 
158
 
159
- 'Uttar-Pradesh': {},
160
 
161
- 'Uttarakhand': {},
 
 
 
 
 
 
162
 
163
- 'West-Bengal': {},
164
- }
 
 
165
 
166
 
 
 
 
 
 
 
 
 
 
167
 
168
- class WEATHER:
169
- def __init__(self):
170
- self.base_url = 'https://nwp.imd.gov.in/blf/blf_temp'
 
 
171
 
172
 
173
  # Weather forecast from Govt. website
174
- def get_weather_forecast(self, state, district, is_block_level=False):
175
- self.district_url = f"{self.base_url}/block.php?dis={STATE_CODES.get(state, '') + district}"
 
 
 
 
 
176
  self.block_url = f'{self.base_url}/table2.php'
177
 
178
  response = requests.get(self.district_url if not is_block_level else self.block_url)
@@ -182,7 +65,10 @@ class WEATHER:
182
 
183
 
184
  # Weather using Google weather API
185
- def get_weather(self, city):
 
 
 
186
  city = city + " weather"
187
  city = city.replace(" ", "+")
188
 
 
1
  import requests
2
  from bs4 import BeautifulSoup as bs
3
+ import src.constants as constants_utils
4
 
5
 
6
+ class WEATHER:
7
+ def __init__(self):
8
+ self.base_url = 'https://nwp.imd.gov.in/blf/blf_temp'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
+ self.states = []
11
+ self.districts = []
12
+ self.states_districts = dict(
13
+ (ds, None) for ds in list(constants_utils.DATA_SOURCES.values()))
14
+
15
+ self.headers = {
16
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
17
+ }
18
 
 
19
 
20
+ def get_state_names(
21
+ self
22
+ ):
23
+ response = requests.get(
24
+ self.base_url,
25
+ headers=self.headers,
26
+ )
27
 
28
+ soup = bs(response.text, 'html.parser')
29
+ self.states = soup.findAll('select', {'onchange': 'window.location.href=this.value'}, limit=None)
30
+ self.states = [state.strip() for state in self.states[0].text.split('\n') if state and state != 'Select']
31
+ return self.states
32
 
33
 
34
+ def get_district_names(
35
+ self,
36
+ state_name
37
+ ):
38
+ url = f"{self.base_url}/dis.php?value={constants_utils.WEATHER_FORECAST_STATE_CODES.get(state_name, '') + state_name}"
39
+ response = requests.get(
40
+ url,
41
+ headers=self.headers,
42
+ )
43
 
44
+ soup = bs(response.text, 'html.parser')
45
+ self.districts = soup.findAll('select', {'name': 'dis'}, limit=None)
46
+ self.districts = [district.strip() for district in self.districts[0].text.split('\n') if district and district != 'Select']
47
+ # self.districts = [district for district in self.districts[0].text.split('\n\n') if district]
48
+ return self.districts
49
 
50
 
51
  # Weather forecast from Govt. website
52
+ def get_weather_forecast(
53
+ self,
54
+ state,
55
+ district,
56
+ is_block_level=False
57
+ ):
58
+ self.district_url = f"{self.base_url}/block.php?dis={constants_utils.WEATHER_FORECAST_STATE_CODES.get(state, '') + district}"
59
  self.block_url = f'{self.base_url}/table2.php'
60
 
61
  response = requests.get(self.district_url if not is_block_level else self.block_url)
 
65
 
66
 
67
  # Weather using Google weather API
68
+ def get_weather(
69
+ self,
70
+ city
71
+ ):
72
  city = city + " weather"
73
  city = city.replace(" ", "+")
74