File size: 2,296 Bytes
c6cc78a 83c8bb0 c6cc78a 9376927 969b5ea c6cc78a 969b5ea c6cc78a 9376927 c6cc78a 9376927 c6cc78a 83c8bb0 9376927 83c8bb0 9376927 843f624 83c8bb0 9376927 83c8bb0 9376927 92f59fe 843f624 92f59fe 83c8bb0 9376927 16eb088 969b5ea 9376927 843f624 c6cc78a 9376927 c6725ff |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
import json
import numpy
import os
import re
# Opening JSON file
f = open('thirukural_git.json')
# returns JSON object as
# a dictionary
data = json.load(f)
en_translations = []
kurals = []
ta_translations=[]
# Iterating through the json
# list
for kural in data['kurals']:
en_translations.append((kural['meaning']['en'].lower()))
ta_translations.append((kural['meaning']['ta_salamon'].lower()))
kurals.append(kural['kural'])
# Closing file
f.close()
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-MiniLM-L6-v2')
sen_embeddings = model.encode(en_translations)
# sen_embeddings= numpy.memmap('trainedmodel',mode="r",dtype=numpy.float32,shape=(1330,768))
# sen_embeddings.tofile('trainedmodel')
def preprocess(input: str):
if input.startswith('/'):
# TODO
return False
values = [int(s) for s in re.findall(r'-?\d+\.?\d*', input)]
if values:
index = values[0]
if index > 0:
return kural_definition(index - 1)
else:
return False
def find_similarities(input: str):
try:
response = preprocess(input)
if response:
return response
input_embeddings = model.encode([input.lower()])
from sklearn.metrics.pairwise import cosine_similarity
# let's calculate cosine similarity for sentence 0:
similarity_matrix = cosine_similarity(
[input_embeddings[0]],
sen_embeddings[1:]
)
indices = [numpy.argpartition(similarity_matrix[0], -3)[-3:]]
indices=sorted(indices[0],key=lambda x:-similarity_matrix[0][x])
response = ''
for index in indices:
print(similarity_matrix[0][index])
response += kural_definition(index + 1)
return response
except:
return "Try again with different query"
def kural_definition(index: int):
response = ''
print(en_translations[index])
response += str(index+1)+'. '+"\n".join(kurals[index]) + "\n"
response += ta_translations[index]+"\n"
response += en_translations[index] + "\n\n"
print("\n".join(kurals[index]))
return response
# while True:
# text = input('Ask valluvar: ')
# if (text == 'exit'):
# break
# find_similarities(text)
|