Spaces:

thiyagab
/

Thamizh

Build error

File size: 2,296 Bytes

import json
import numpy
import os
import re

# Opening JSON file
f = open('thirukural_git.json')

# returns JSON object as
# a dictionary
data = json.load(f)

en_translations = []
kurals = []
ta_translations=[]
# Iterating through the json
# list
for kural in data['kurals']:
    en_translations.append((kural['meaning']['en'].lower()))
    ta_translations.append((kural['meaning']['ta_salamon'].lower()))
    kurals.append(kural['kural'])

# Closing file
f.close()
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('all-MiniLM-L6-v2')
sen_embeddings = model.encode(en_translations)


# sen_embeddings= numpy.memmap('trainedmodel',mode="r",dtype=numpy.float32,shape=(1330,768))
# sen_embeddings.tofile('trainedmodel')


def preprocess(input: str):
    if input.startswith('/'):
        # TODO
        return False
    values = [int(s) for s in re.findall(r'-?\d+\.?\d*', input)]
    if values:
        index = values[0]
        if index > 0:
            return kural_definition(index - 1)
    else:
        return False


def find_similarities(input: str):
    try:
        response = preprocess(input)
        if response:
            return response
        input_embeddings = model.encode([input.lower()])
        from sklearn.metrics.pairwise import cosine_similarity
        # let's calculate cosine similarity for sentence 0:
        similarity_matrix = cosine_similarity(
            [input_embeddings[0]],
            sen_embeddings[1:]
        )
        indices = [numpy.argpartition(similarity_matrix[0], -3)[-3:]]
        indices=sorted(indices[0],key=lambda x:-similarity_matrix[0][x])
        response = ''
        for index in indices:
            print(similarity_matrix[0][index])
            response += kural_definition(index + 1)
        return response
    except:
        return "Try again with different query"



def kural_definition(index: int):
    response = ''
    print(en_translations[index])
    response += str(index+1)+'. '+"\n".join(kurals[index]) + "\n"
    response += ta_translations[index]+"\n"
    response += en_translations[index] + "\n\n"
    print("\n".join(kurals[index]))
    return response


# while True:
#     text = input('Ask valluvar: ')
#     if (text == 'exit'):
#         break
#     find_similarities(text)