File size: 2,008 Bytes
c6cc78a 83c8bb0 c6cc78a 9376927 c6cc78a 9376927 c6cc78a 9376927 c6cc78a 83c8bb0 9376927 83c8bb0 9376927 843f624 83c8bb0 9376927 83c8bb0 9376927 843f624 9376927 843f624 9376927 843f624 83c8bb0 9376927 843f624 c6cc78a 9376927 c6725ff |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
import json
import numpy
import os
import re
# Opening JSON file
f = open('thirukural_git.json')
# returns JSON object as
# a dictionary
data = json.load(f)
en_translations = []
kurals = []
# Iterating through the json
# list
for kural in data['kurals']:
en_translations.append((kural['meaning']['en'].lower()))
kurals.append(kural['kural'])
# Closing file
f.close()
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-MiniLM-L6-v2')
sen_embeddings = model.encode(en_translations)
# sen_embeddings= numpy.memmap('trainedmodel',mode="r",dtype=numpy.float32,shape=(1330,768))
# sen_embeddings.tofile('trainedmodel')
def preprocess(input: str):
if input.startswith('/'):
# TODO
return False
values = [int(s) for s in re.findall(r'-?\d+\.?\d*', input)]
if values:
index = values[0]
if index > 0:
return kural_definition(index - 1)
else:
return False
def find_similarities(input: str):
response = preprocess(input)
if response:
return response
input_embeddings = model.encode([input.lower()])
from sklearn.metrics.pairwise import cosine_similarity
# let's calculate cosine similarity for sentence 0:
similarity_matrix = cosine_similarity(
[input_embeddings[0]],
sen_embeddings[1:]
)
indices = [numpy.argpartition(similarity_matrix[0], -3)[-3:]]
indices=sorted(indices[0],key=lambda x:-similarity_matrix[0][x])
response = ''
for index in indices:
print(similarity_matrix[0][index])
response += kural_definition(index + 1)
return response
def kural_definition(index: int):
response = ''
print(en_translations[index])
response += "\n".join(kurals[index]) + "\n"
response += en_translations[index] + "\n\n"
print("\n".join(kurals[index]))
return response
# while True:
# text = input('Ask valluvar: ')
# if (text == 'exit'):
# break
# find_similarities(text)
|