Spaces:
Sleeping
Sleeping
sampathlonka
commited on
Commit
•
377ed3a
1
Parent(s):
a6b7040
version_2
Browse files- Tools.py +248 -0
- database.py +43 -0
- requirements.txt +3 -1
- utils.py +120 -0
Tools.py
ADDED
@@ -0,0 +1,248 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import chardet
|
3 |
+
import pandas as pd
|
4 |
+
import streamlit as st
|
5 |
+
import pymysql
|
6 |
+
import ast
|
7 |
+
import re
|
8 |
+
from utils import word_sentence_similarity, extract_meaning_by_language, get_list_meaning_word, get_details_mantra_json
|
9 |
+
from llama_index.core.tools.tool_spec.base import BaseToolSpec
|
10 |
+
from database import execute_query
|
11 |
+
import pandas as pd
|
12 |
+
import json
|
13 |
+
import ast
|
14 |
+
import logging
|
15 |
+
|
16 |
+
|
17 |
+
# Constants
|
18 |
+
SCRIPTURE_DESCRIPTIONS_CSV_PATH = r"Data/scripture_descriptions.csv"
|
19 |
+
VEDAMANTRA_CSV_PATH = r"Data/veda_content_modified_v3.csv"
|
20 |
+
PADA_CSV_PATH = r"Data/term_data_processed_v2.csv"
|
21 |
+
|
22 |
+
class ScriptureDescriptionToolSpec(BaseToolSpec):
|
23 |
+
spec_functions = ["get_description"]
|
24 |
+
|
25 |
+
def __init__(self):
|
26 |
+
super().__init__()
|
27 |
+
with open(SCRIPTURE_DESCRIPTIONS_CSV_PATH, 'rb') as f:
|
28 |
+
result = chardet.detect(f.read())
|
29 |
+
encoding = result['encoding']
|
30 |
+
self.df = pd.read_csv(SCRIPTURE_DESCRIPTIONS_CSV_PATH, encoding=encoding)
|
31 |
+
|
32 |
+
def _query_description(self, conditions):
|
33 |
+
try:
|
34 |
+
result = self.df[conditions]
|
35 |
+
if not result.empty:
|
36 |
+
return result.iloc[0].to_dict()
|
37 |
+
else:
|
38 |
+
raise IndexError("Scripture description not found.")
|
39 |
+
except IndexError as e:
|
40 |
+
raise ValueError(f"Failed to get scripture description: {e}")
|
41 |
+
|
42 |
+
def get_description(self, level_0, level_1=None, level_2=None, level_3=None):
|
43 |
+
try:
|
44 |
+
conditions = (self.df['scripture_name'].str.lower() == level_0.lower())
|
45 |
+
if level_3 is not None:
|
46 |
+
conditions &= (self.df['level_1'] == str(level_1)) & (self.df['level_2'] == str(level_2)) & (self.df['level_3'] == str(level_3))
|
47 |
+
elif level_2 is not None:
|
48 |
+
conditions &= (self.df['level_1'] == str(level_1)) & (self.df['level_2'] == str(level_2))
|
49 |
+
elif level_1 is not None:
|
50 |
+
conditions &= (self.df['level_1'] == str(level_1))
|
51 |
+
return self._query_description(conditions)
|
52 |
+
except ValueError as e:
|
53 |
+
return {"error": str(e)}
|
54 |
+
|
55 |
+
class MantraToolSpec(BaseToolSpec):
|
56 |
+
'''
|
57 |
+
To obtain the vedamantra details such as vedamantra, padapata, devata, chandah, rishi etc of vedamantras (or mantras or hyms) from all vedas (RigVeda, AtharvaVeda, SamaVeda, KrishnaYajurVeda, and ShuklaYajurVeda) using the function
|
58 |
+
`get_vedamantra_details`. The mantra summary like anvaya, mantraVishaya, bhavartha/meaning (adhibautic, ahyatmic, adhidaivic), purpose, usage, tippani of vedamantra accessible using the function 'get_vedamantra_summary'
|
59 |
+
Sample Query:
|
60 |
+
1. What is the vedamantra of the mantra from Rigveda, first mandala, first shukta, and first mantra?
|
61 |
+
2. What is the devata of the vedamantra from Rigveda, first mandala, first shukta, and first mantra?
|
62 |
+
3. What is the meaning of the vedamantra from Rigveda, first mandala, first shukta, and first mantra written by Tulsi Ram?
|
63 |
+
4. What is the (adhibautic) meaning of the vedamantra from RigVeda, first mandala, first shukta, and first mantra?
|
64 |
+
5. What is the mantraVishaya of the vedamantra from RigVeda, first mandala, first shukta, and first mantra?
|
65 |
+
'''
|
66 |
+
spec_functions = ["get_vedamantra_details", "get_vedamantra_summary"]
|
67 |
+
|
68 |
+
def __init__(self):
|
69 |
+
super().__init__()
|
70 |
+
self.df_vedamantra = pd.read_csv(VEDAMANTRA_CSV_PATH, encoding='utf-8')
|
71 |
+
|
72 |
+
def _get_mantra_details(self, query):
|
73 |
+
try:
|
74 |
+
details = get_details_mantra_json(query)
|
75 |
+
return details['mantraHeader']['language'][1]
|
76 |
+
except Exception as e:
|
77 |
+
raise ValueError(f"Failed to get mantra details: {e}")
|
78 |
+
|
79 |
+
def _query_db(self, conditions):
|
80 |
+
try:
|
81 |
+
result = self.df_vedamantra[conditions]['mantra_number'].values
|
82 |
+
if len(result) == 0:
|
83 |
+
raise ValueError("Mantra not found.")
|
84 |
+
return result[0]
|
85 |
+
except Exception as e:
|
86 |
+
raise ValueError("Failed to query database.")
|
87 |
+
|
88 |
+
def _get_query_conditions(self, scripture_name, **kwargs):
|
89 |
+
conditions = (self.df_vedamantra['scripture_name'].str.lower() == scripture_name.lower())
|
90 |
+
for key, value in kwargs.items():
|
91 |
+
conditions &= (self.df_vedamantra[key] == value)
|
92 |
+
return conditions
|
93 |
+
|
94 |
+
def _get_mantra_id(self, scripture_name, **kwargs):
|
95 |
+
conditions = self._get_query_conditions(scripture_name, **kwargs)
|
96 |
+
return self._query_db(conditions)
|
97 |
+
|
98 |
+
def get_vedamantra_details(self, mantraid=None, scripture_name=None, **kwargs):
|
99 |
+
try:
|
100 |
+
if mantraid:
|
101 |
+
query = f"SELECT mantra_json FROM veda_content WHERE mantra_number = '{mantraid}'"
|
102 |
+
else:
|
103 |
+
mantra_id = self._get_mantra_id(scripture_name, **kwargs)
|
104 |
+
query = f"SELECT mantra_json FROM veda_content WHERE mantra_number = '{mantra_id}'"
|
105 |
+
return self._get_mantra_details(query)
|
106 |
+
except Exception as e:
|
107 |
+
return {"error": str(e)}
|
108 |
+
|
109 |
+
def get_vedamantra_summary(self, mantraid=None, scripture_name=None, **kwargs):
|
110 |
+
try:
|
111 |
+
if mantraid:
|
112 |
+
query = f"SELECT mantra_json FROM veda_content WHERE mantra_number = '{mantraid}'"
|
113 |
+
else:
|
114 |
+
mantra_id = self._get_mantra_id(scripture_name, **kwargs)
|
115 |
+
query = f"SELECT mantra_json FROM veda_content WHERE mantra_number = '{mantra_id}'"
|
116 |
+
json_dict = get_details_mantra_json(query)
|
117 |
+
mantra_summary = json_dict['mantraSummary']['language']
|
118 |
+
summary_dict = {"Roman-IAST summary of vedamantra": json_dict['mantraSummary']['language'][1]}
|
119 |
+
for item in mantra_summary:
|
120 |
+
if item['languageName'] == 'English':
|
121 |
+
mahatma = item['mahatma']['mahatmaName']
|
122 |
+
summary_dict[f"English summary of vedamantra by {mahatma}"] = item
|
123 |
+
return summary_dict
|
124 |
+
except Exception as e:
|
125 |
+
return {"error": str(e)}
|
126 |
+
|
127 |
+
class PadaToolSpec(BaseToolSpec):
|
128 |
+
'''
|
129 |
+
Purpose: To obtains a complete or meaningful meaning of a word or pada based on context information.
|
130 |
+
1. The function 'get_meaning_pada' used to get all the possible meanings of the pada based on the given information.
|
131 |
+
2. The function 'get_adibauatic_adidaivic_adyatmic_meaning_of_pada' used to get the adibhautic, adidaivic and sdyatmic meaning of a word based on context information.\
|
132 |
+
Use the context to generate a meaningful meaning of the pada in the vedamantra.
|
133 |
+
Sample query:
|
134 |
+
1. What is the meaning of the word apratidhṛṣṭa-śavasam?
|
135 |
+
2. What is the adibauatic meaning of the word apratidhṛṣṭa-śavasam?
|
136 |
+
3. Whats the adidaivic meaning of the word apratidhṛṣṭa-śavasam?
|
137 |
+
4. What is the adyatmic meaning of the word apratidhṛṣṭa-śavasam?
|
138 |
+
'''
|
139 |
+
spec_functions = ["get_pada_meaning","get_adibauatic_adidaivic_adhyatmic_meaning_of_pada"]
|
140 |
+
|
141 |
+
def __init__(self):
|
142 |
+
super().__init__()
|
143 |
+
self.df_terms = pd.read_csv(PADA_CSV_PATH, dtype={'AnuvakNumber': 'Int64', 'PrapatakNumber': 'Int64', 'KandahNumber': 'Int64', 'ShuktaNumber': 'Int64', 'ArchikahNumber': 'Int64', 'AdhyayaNumber': 'Int64', 'MandalaNumber': 'Int64', 'ParyayaNumber': 'Int64'}, encoding='utf-8')
|
144 |
+
self.df_vedic_content = pd.read_csv(VEDAMANTRA_CSV_PATH,encoding = 'utf-8')
|
145 |
+
|
146 |
+
def _get_pada_details_by_scripture(self, pada, scripture_name=None, **kwargs):
|
147 |
+
try:
|
148 |
+
condition = (self.df_terms['Pada'] == pada)
|
149 |
+
if scripture_name:
|
150 |
+
condition &= (self.df_terms['scripture_name'].str.lower() == scripture_name.lower())
|
151 |
+
for key, value in kwargs.items():
|
152 |
+
if value is not None:
|
153 |
+
condition &= (self.df_terms[key] == value)
|
154 |
+
filtered_df = self.df_terms[condition]
|
155 |
+
return filtered_df if not filtered_df.empty else None
|
156 |
+
except Exception as e:
|
157 |
+
logging.error(f"Error in _get_pada_details_by_scripture: {e}")
|
158 |
+
return None
|
159 |
+
|
160 |
+
def _get_vedamantra_meaning(self, mantraID, MahatmaName=None):
|
161 |
+
try:
|
162 |
+
query = f"SELECT mantra_json FROM veda_content WHERE mantra_number = '{mantraID}'"
|
163 |
+
jsonDict = get_details_mantra_json(query)
|
164 |
+
mantraSummary = jsonDict['mantraSummary']['language']
|
165 |
+
if MahatmaName is not None:
|
166 |
+
filtered_summary = [data_dict for data_dict in mantraSummary if data_dict.get('mahatma', {}).get('mahatmaName') == MahatmaName]
|
167 |
+
if filtered_summary:
|
168 |
+
mantraSummary = filtered_summary
|
169 |
+
best_meaning = None
|
170 |
+
best_count = 0
|
171 |
+
for data_dict in mantraSummary:
|
172 |
+
if data_dict.get('languageName') == "English":
|
173 |
+
meanings = data_dict['mahatma']['bhavartha']
|
174 |
+
count = sum(bool(meanings.get(cat, None)) for cat in ['adibhautic', 'adidaivic', 'adhyatmic'])
|
175 |
+
if count >= best_count:
|
176 |
+
best_meaning = {cat: meanings.get(cat, None) for cat in ['adibhautic', 'adidaivic', 'adhyatmic']}
|
177 |
+
best_count = count
|
178 |
+
return best_meaning if best_meaning else {"error": "Required meaning associated with vedamantra is not available."}
|
179 |
+
except Exception as e:
|
180 |
+
logging.error(f"Error in _get_vedamantra_meaning: {e}")
|
181 |
+
return {"error": f"An error occurred: {e}"}
|
182 |
+
|
183 |
+
def _get_pada_morphology(self, term_details, meanings):
|
184 |
+
try:
|
185 |
+
morphology_list = ast.literal_eval(term_details['Morphology'])
|
186 |
+
term_morph_list = []
|
187 |
+
for morphs in morphology_list:
|
188 |
+
term_info = {}
|
189 |
+
for field in ['stem', 'root']:
|
190 |
+
morph_word = morphs.get(field)
|
191 |
+
if morph_word:
|
192 |
+
meaning = word_sentence_similarity(meanings, morph_word)
|
193 |
+
term_info[f'{field}_word'] = morph_word
|
194 |
+
term_info[f'{field}_meaning'] = meaning[0][0] if meaning else None
|
195 |
+
term_info[f'{field}_score'] = meaning[0][1] if meaning else None
|
196 |
+
term_info['grammar'] = morphs['grammar']
|
197 |
+
term_morph_list.append(term_info)
|
198 |
+
return term_morph_list
|
199 |
+
except Exception as e:
|
200 |
+
logging.error(f"Error in _get_pada_morphology: {e}")
|
201 |
+
return []
|
202 |
+
|
203 |
+
def get_pada_meaning(self, pada):
|
204 |
+
try:
|
205 |
+
pada_details = self.df_terms[self.df_terms['Pada'] == pada]
|
206 |
+
meanings_list = []
|
207 |
+
for morphs in ast.literal_eval(pada_details['Morphology'].values[0]):
|
208 |
+
for field in ['stem', 'root']:
|
209 |
+
word = morphs.get(field)
|
210 |
+
if word:
|
211 |
+
meanings_list.append(get_list_meaning_word(word))
|
212 |
+
return meanings_list
|
213 |
+
except Exception as e:
|
214 |
+
logging.error(f"Error in get_pada_meaning: {e}")
|
215 |
+
return {"error": f"Required meaning associated with pada is not available. {e}"}
|
216 |
+
|
217 |
+
|
218 |
+
def get_adibauatic_adidaivic_adhyatmic_meaning_of_pada(self, pada, Pada_position=None, mantraid=None, scripture_name=None, **kwargs):
|
219 |
+
try:
|
220 |
+
if mantraid:
|
221 |
+
details = self.df_terms[(self.df_terms['mantra_id'] == mantraid) & (self.df_terms['Pada'] == pada)]
|
222 |
+
else:
|
223 |
+
if scripture_name is not None:
|
224 |
+
details = self._get_pada_details_by_scripture(pada, scripture_name, **kwargs)
|
225 |
+
if Pada_position:
|
226 |
+
details = details[details['Pada_position'] == Pada_position]
|
227 |
+
if details.empty:
|
228 |
+
return {"error": f"No details found for pada '{pada}'"}
|
229 |
+
pada_details = details.iloc[0]
|
230 |
+
#print(pada_details)
|
231 |
+
mantraID = pada_details['mantra_id']
|
232 |
+
meanings = self._get_vedamantra_meaning(mantraID,MahatmaName=kwargs.get('MahatmaName'))
|
233 |
+
if 'error' in meanings:
|
234 |
+
return meanings
|
235 |
+
ab_term_morph_list = self._get_pada_morphology(pada_details, meanings['adibhautic'])
|
236 |
+
ad_term_morph_list = self._get_pada_morphology(pada_details, meanings['adidaivic'])
|
237 |
+
at_term_morph_list = self._get_pada_morphology(pada_details, meanings['adhyatmic'])
|
238 |
+
return {
|
239 |
+
f'adibhautic_info_{pada}': ab_term_morph_list,
|
240 |
+
'vedamantra_adibhautic_meaning': meanings['adibhautic'],
|
241 |
+
f'adidavic_info_{pada}': ad_term_morph_list,
|
242 |
+
'vedamantra_adidavic_meaning': meanings['adidaivic'],
|
243 |
+
f'adhyatmic_info_{pada}': at_term_morph_list,
|
244 |
+
'vedamantra_adhyatmic_meaning': meanings['adhyatmic']
|
245 |
+
}
|
246 |
+
except Exception as e:
|
247 |
+
logging.error(f"Error in get_adibauatic_adidaivic_adhyatmic_meaning_of_pada: {e}")
|
248 |
+
return {"error": f"Failed to get meaning of the word {pada}. {e}"}
|
database.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pymysql
|
2 |
+
import json
|
3 |
+
import pandas as pd
|
4 |
+
import re
|
5 |
+
|
6 |
+
|
7 |
+
def initialize_database():
|
8 |
+
# Database Connection
|
9 |
+
db_params = {"host": "localhost",
|
10 |
+
"user": "cms-readonly-user",
|
11 |
+
"password": "%Reed!!",
|
12 |
+
"port": 3307,
|
13 |
+
"database": "veda_prod_v1"
|
14 |
+
}
|
15 |
+
db = pymysql.connect(**db_params)
|
16 |
+
return db
|
17 |
+
|
18 |
+
def execute_query(query):
|
19 |
+
db = initialize_database()
|
20 |
+
cursor = db.cursor()
|
21 |
+
try:
|
22 |
+
cursor.execute(query)
|
23 |
+
description = cursor.description
|
24 |
+
result = cursor.fetchall() # Fetch all rows from the result set
|
25 |
+
db.commit()
|
26 |
+
return description, result
|
27 |
+
except Exception as e:
|
28 |
+
print("Error executing query:", e)
|
29 |
+
db.rollback()
|
30 |
+
return None # Return None if an error occurs
|
31 |
+
finally:
|
32 |
+
db.close()
|
33 |
+
|
34 |
+
|
35 |
+
def _get_details_mantra_json(self, query):
|
36 |
+
description, data = execute_query(query)
|
37 |
+
df = pd.DataFrame(data)
|
38 |
+
df.columns = [x[0] for x in description]
|
39 |
+
mantra_json = df['mantra_json'].values[0]
|
40 |
+
cleaned_data = re.sub('<[^<]+?>', '', mantra_json)
|
41 |
+
return json.loads(cleaned_data)
|
42 |
+
|
43 |
+
|
requirements.txt
CHANGED
@@ -3,7 +3,9 @@ sentence_transformers==2.4.0
|
|
3 |
llama_index==0.10.4
|
4 |
llama-index-vector-stores-pinecone
|
5 |
llama-index-embeddings-huggingface
|
|
|
6 |
pinecone-client==3.1.0
|
7 |
cohere==4.50
|
8 |
chardet==5.2.0
|
9 |
-
streamlit==1.31.1
|
|
|
|
3 |
llama_index==0.10.4
|
4 |
llama-index-vector-stores-pinecone
|
5 |
llama-index-embeddings-huggingface
|
6 |
+
llama_index-embeddings-nomic
|
7 |
pinecone-client==3.1.0
|
8 |
cohere==4.50
|
9 |
chardet==5.2.0
|
10 |
+
streamlit==1.31.1
|
11 |
+
aksharamukha==2.1.2
|
utils.py
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import re
|
3 |
+
import requests
|
4 |
+
import json
|
5 |
+
import numpy as np
|
6 |
+
import pandas as pd
|
7 |
+
from bs4 import BeautifulSoup
|
8 |
+
from database import execute_query
|
9 |
+
from aksharamukha import transliterate
|
10 |
+
from sentence_transformers import util
|
11 |
+
from llama_index.embeddings.nomic import NomicEmbedding
|
12 |
+
|
13 |
+
nomic_api_key = os.getenv('NOMIC_API_KEY')
|
14 |
+
#nomic embed model used for similarity scores
|
15 |
+
nomic_embed_model = NomicEmbedding(
|
16 |
+
api_key=nomic_api_key,
|
17 |
+
dimensionality=128,
|
18 |
+
model_name="nomic-embed-text-v1.5",
|
19 |
+
)
|
20 |
+
|
21 |
+
|
22 |
+
|
23 |
+
def get_list_meaning_word(word):
|
24 |
+
pada_meanings = {'pada': word,
|
25 |
+
'Monier-Williams Sanskrit-English Dictionary (1899)': [],
|
26 |
+
'Shabda-Sagara (1900)': [],
|
27 |
+
'Apte-Practical Sanskrit-English Dictionary (1890)': [],
|
28 |
+
}
|
29 |
+
url = f"https://ambuda.org/tools/dictionaries/mw,shabdasagara,apte/{word}"
|
30 |
+
|
31 |
+
try:
|
32 |
+
# Fetch HTML content
|
33 |
+
response = requests.get(url)
|
34 |
+
response.raise_for_status()
|
35 |
+
|
36 |
+
# Parse HTML with BeautifulSoup
|
37 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
38 |
+
|
39 |
+
# Extracting text from different tags
|
40 |
+
divs = soup.find_all('div', class_='my-4', attrs={'x-show': 'show'})
|
41 |
+
|
42 |
+
try:
|
43 |
+
# Find all list items <li> within the specified <ul> tag
|
44 |
+
div_items_0 = divs[0].find('ul').find_all('li', class_='dict-entry mw-entry')
|
45 |
+
# Print the text content of each list item
|
46 |
+
dive_text_0 = [li_tag.get_text(strip=True) for li_tag in div_items_0]
|
47 |
+
text_0_trans = [transliterate.process(src='Devanagari', tgt='IAST', txt=text) for text in dive_text_0]
|
48 |
+
pada_meanings['Monier-Williams Sanskrit-English Dictionary (1899)'] = text_0_trans
|
49 |
+
except :
|
50 |
+
print("Error: Unable to find Monier-Williams Sanskrit-English Dictionary (1899) data.")
|
51 |
+
|
52 |
+
try:
|
53 |
+
div_items_1 = divs[1].find_all('div')
|
54 |
+
dive_text_1 = [item.get_text(strip=True) for item in div_items_1]
|
55 |
+
text_1_trans = [transliterate.process(src='Devanagari', tgt='IAST', txt=text) for text in dive_text_1]
|
56 |
+
pada_meanings['Shabda-Sagara (1900)'] = text_1_trans
|
57 |
+
except :
|
58 |
+
print("Error: Unable to find Shabda-Sagara (1900) data.")
|
59 |
+
|
60 |
+
try:
|
61 |
+
apte_meanings = []
|
62 |
+
for tag in divs[2].find_all('b'):
|
63 |
+
if tag.text.strip() != '—':
|
64 |
+
text1 = tag.text.strip() # English text within <b> tag
|
65 |
+
sibling = tag.find_next_sibling() # Text following <b> tag
|
66 |
+
text2 = tag.next_sibling.strip() + ' ' # English text following <b> tag
|
67 |
+
while sibling.name != 'div':
|
68 |
+
if sibling.name is None: # Handling non-tag text
|
69 |
+
text2 += " "
|
70 |
+
elif sibling.name == 'span': # Handling <b> tag
|
71 |
+
IAST_text = transliterate.process(src='Devanagari', tgt='IAST', txt=sibling.text.strip())
|
72 |
+
text2 += IAST_text + ' ' + sibling.next_sibling.strip()
|
73 |
+
else:
|
74 |
+
text2 += sibling.text.strip() + ' ' + sibling.next_sibling.strip()
|
75 |
+
sibling = sibling.find_next_sibling()
|
76 |
+
apte_meanings.append(text2)
|
77 |
+
pada_meanings['Apte-Practical Sanskrit-English Dictionary (1890)'] = apte_meanings[:-1]
|
78 |
+
except:
|
79 |
+
print("Error: Unable to find Apte-Practical Sanskrit-English Dictionary (1890) data.")
|
80 |
+
|
81 |
+
except requests.exceptions.RequestException as e:
|
82 |
+
print(f"Error: Failed to fetch data from {url}. {e}")
|
83 |
+
|
84 |
+
return pada_meanings
|
85 |
+
|
86 |
+
#get similarity scores
|
87 |
+
def word_sentence_similarity(meanings, root_stem_word):
|
88 |
+
# Check if the word embeddings are not empty
|
89 |
+
if not meanings or not root_stem_word:
|
90 |
+
return None
|
91 |
+
|
92 |
+
meaning_embedding = np.array(nomic_embed_model.get_text_embedding(meanings))
|
93 |
+
all_meanings = []
|
94 |
+
word_score_pair = []
|
95 |
+
all_meanings.extend(get_list_meaning_word(root_stem_word)['Monier-Williams Sanskrit-English Dictionary (1899)'])
|
96 |
+
all_meanings.extend(get_list_meaning_word(root_stem_word)['Shabda-Sagara (1900)'])
|
97 |
+
for word_meaning in all_meanings:
|
98 |
+
root_stem_word_meaning_embedding = np.array(nomic_embed_model.get_text_embedding(word_meaning))
|
99 |
+
# Calculate cosine similarity
|
100 |
+
similarity_score = util.pytorch_cos_sim(meaning_embedding, root_stem_word_meaning_embedding).item()
|
101 |
+
word_score_pair.append((word_meaning,similarity_score))
|
102 |
+
# Sort the list in descending order based on similarity scores
|
103 |
+
sorted_word_score_pairs = sorted(word_score_pair, key=lambda x: x[1], reverse=True)
|
104 |
+
return sorted_word_score_pairs
|
105 |
+
|
106 |
+
#extract the adhibautic meaning of the mantra from the vedamantra
|
107 |
+
def extract_meaning_by_language(data_list, target_language='English'):
|
108 |
+
for data_dict in data_list:
|
109 |
+
if data_dict.get('languageName') == target_language:
|
110 |
+
return data_dict.get('mahatma', {})
|
111 |
+
return None
|
112 |
+
|
113 |
+
#mantra_json_details
|
114 |
+
def get_details_mantra_json(query):
|
115 |
+
description, data = execute_query(query)
|
116 |
+
df = pd.DataFrame(data)
|
117 |
+
df.columns = [x[0] for x in description]
|
118 |
+
mantra_json = df['mantra_json'].values[0]
|
119 |
+
cleaned_data = re.sub('<[^<]+?>', '', mantra_json)
|
120 |
+
return json.loads(cleaned_data)
|