File size: 2,008 Bytes
c6cc78a
 
 
83c8bb0
c6cc78a
 
 
 
 
 
 
 
9376927
 
c6cc78a
 
 
 
 
 
 
 
 
9376927
c6cc78a
 
 
9376927
c6cc78a
 
 
83c8bb0
9376927
83c8bb0
9376927
843f624
83c8bb0
 
9376927
 
 
83c8bb0
9376927
 
 
 
843f624
 
 
 
 
9376927
 
843f624
 
 
9376927
 
 
 
 
 
843f624
83c8bb0
 
9376927
 
 
 
 
 
843f624
c6cc78a
9376927
c6725ff
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import json
import numpy
import os
import re

# Opening JSON file
f = open('thirukural_git.json')

# returns JSON object as
# a dictionary
data = json.load(f)

en_translations = []
kurals = []
# Iterating through the json
# list
for kural in data['kurals']:
    en_translations.append((kural['meaning']['en'].lower()))
    kurals.append(kural['kural'])

# Closing file
f.close()
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('all-MiniLM-L6-v2')
sen_embeddings = model.encode(en_translations)


# sen_embeddings= numpy.memmap('trainedmodel',mode="r",dtype=numpy.float32,shape=(1330,768))
# sen_embeddings.tofile('trainedmodel')


def preprocess(input: str):
    if input.startswith('/'):
        # TODO
        return False
    values = [int(s) for s in re.findall(r'-?\d+\.?\d*', input)]
    if values:
        index = values[0]
        if index > 0:
            return kural_definition(index - 1)
    else:
        return False


def find_similarities(input: str):
    response = preprocess(input)
    if response:
        return response
    input_embeddings = model.encode([input.lower()])
    from sklearn.metrics.pairwise import cosine_similarity
    # let's calculate cosine similarity for sentence 0:
    similarity_matrix = cosine_similarity(
        [input_embeddings[0]],
        sen_embeddings[1:]
    )
    indices = [numpy.argpartition(similarity_matrix[0], -3)[-3:]]
    indices=sorted(indices[0],key=lambda x:-similarity_matrix[0][x])
    response = ''
    for index in indices:
        print(similarity_matrix[0][index])
        response += kural_definition(index + 1)
    return response


def kural_definition(index: int):
    response = ''
    print(en_translations[index])
    response += "\n".join(kurals[index]) + "\n"
    response += en_translations[index] + "\n\n"
    print("\n".join(kurals[index]))
    return response


# while True:
#     text = input('Ask valluvar: ')
#     if (text == 'exit'):
#         break
#     find_similarities(text)