Spaces:

thiyagab
/

Thamizh

Build error

File size: 2,008 Bytes

import json
import numpy
import os
import re

# Opening JSON file
f = open('thirukural_git.json')

# returns JSON object as
# a dictionary
data = json.load(f)

en_translations = []
kurals = []
# Iterating through the json
# list
for kural in data['kurals']:
    en_translations.append((kural['meaning']['en'].lower()))
    kurals.append(kural['kural'])

# Closing file
f.close()
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('all-MiniLM-L6-v2')
sen_embeddings = model.encode(en_translations)


# sen_embeddings= numpy.memmap('trainedmodel',mode="r",dtype=numpy.float32,shape=(1330,768))
# sen_embeddings.tofile('trainedmodel')


def preprocess(input: str):
    if input.startswith('/'):
        # TODO
        return False
    values = [int(s) for s in re.findall(r'-?\d+\.?\d*', input)]
    if values:
        index = values[0]
        if index > 0:
            return kural_definition(index - 1)
    else:
        return False


def find_similarities(input: str):
    response = preprocess(input)
    if response:
        return response
    input_embeddings = model.encode([input.lower()])
    from sklearn.metrics.pairwise import cosine_similarity
    # let's calculate cosine similarity for sentence 0:
    similarity_matrix = cosine_similarity(
        [input_embeddings[0]],
        sen_embeddings[1:]
    )
    indices = [numpy.argpartition(similarity_matrix[0], -3)[-3:]]
    indices=sorted(indices[0],key=lambda x:-similarity_matrix[0][x])
    response = ''
    for index in indices:
        print(similarity_matrix[0][index])
        response += kural_definition(index + 1)
    return response


def kural_definition(index: int):
    response = ''
    print(en_translations[index])
    response += "\n".join(kurals[index]) + "\n"
    response += en_translations[index] + "\n\n"
    print("\n".join(kurals[index]))
    return response


# while True:
#     text = input('Ask valluvar: ')
#     if (text == 'exit'):
#         break
#     find_similarities(text)