File size: 2,296 Bytes
c6cc78a
 
 
83c8bb0
c6cc78a
 
 
 
 
 
 
 
9376927
 
969b5ea
c6cc78a
 
 
 
969b5ea
c6cc78a
 
 
 
 
9376927
c6cc78a
 
 
9376927
c6cc78a
 
 
83c8bb0
9376927
83c8bb0
9376927
843f624
83c8bb0
 
9376927
 
 
83c8bb0
9376927
 
 
 
92f59fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
843f624
92f59fe
 
 
83c8bb0
 
9376927
 
 
16eb088
969b5ea
9376927
 
843f624
c6cc78a
9376927
c6725ff
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import json
import numpy
import os
import re

# Opening JSON file
f = open('thirukural_git.json')

# returns JSON object as
# a dictionary
data = json.load(f)

en_translations = []
kurals = []
ta_translations=[]
# Iterating through the json
# list
for kural in data['kurals']:
    en_translations.append((kural['meaning']['en'].lower()))
    ta_translations.append((kural['meaning']['ta_salamon'].lower()))
    kurals.append(kural['kural'])

# Closing file
f.close()
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('all-MiniLM-L6-v2')
sen_embeddings = model.encode(en_translations)


# sen_embeddings= numpy.memmap('trainedmodel',mode="r",dtype=numpy.float32,shape=(1330,768))
# sen_embeddings.tofile('trainedmodel')


def preprocess(input: str):
    if input.startswith('/'):
        # TODO
        return False
    values = [int(s) for s in re.findall(r'-?\d+\.?\d*', input)]
    if values:
        index = values[0]
        if index > 0:
            return kural_definition(index - 1)
    else:
        return False


def find_similarities(input: str):
    try:
        response = preprocess(input)
        if response:
            return response
        input_embeddings = model.encode([input.lower()])
        from sklearn.metrics.pairwise import cosine_similarity
        # let's calculate cosine similarity for sentence 0:
        similarity_matrix = cosine_similarity(
            [input_embeddings[0]],
            sen_embeddings[1:]
        )
        indices = [numpy.argpartition(similarity_matrix[0], -3)[-3:]]
        indices=sorted(indices[0],key=lambda x:-similarity_matrix[0][x])
        response = ''
        for index in indices:
            print(similarity_matrix[0][index])
            response += kural_definition(index + 1)
        return response
    except:
        return "Try again with different query"



def kural_definition(index: int):
    response = ''
    print(en_translations[index])
    response += str(index+1)+'. '+"\n".join(kurals[index]) + "\n"
    response += ta_translations[index]+"\n"
    response += en_translations[index] + "\n\n"
    print("\n".join(kurals[index]))
    return response


# while True:
#     text = input('Ask valluvar: ')
#     if (text == 'exit'):
#         break
#     find_similarities(text)