Spaces:

flow3rdown
/

word_sim

Sleeping

File size: 897 Bytes

8a8ed77
260dd9d
8a8ed77
e107c7c
260dd9d
32dc508
260dd9d
 
 
8a8ed77
260dd9d
 
 
 
e107c7c
 
 
 
 
260dd9d
 
 
 
e107c7c

import gradio as gr
from gensim.models import KeyedVectors


def isNoneWords(word):
    if word is None or len(word)==0 or word not in model.key_to_index:
        return True
    else:
        return False

def top_similarity_route(word):
    if isNoneWords(word):
        return "word is null or not in model!"
    else:
        top_similar_words = model.similar_by_word(word, topn=20, restrict_vocab=None)
        sim_res = ""
        for item in top_similar_words:
            sim_res += f'{item[0]}: {round(item[1], 4)}\n'
        return sim_res


if __name__ == '__main__':
    model = KeyedVectors.load_word2vec_format('tencent-ailab-embedding-zh-d100-v0.2.0-s.txt', binary=False)
    
    title = 'Calculate word similarity based on Tencent AI Lab Embedding'
    iface = gr.Interface(fn=top_similarity_route, inputs="Word", outputs="Similar words", title=title)
    iface.launch(share=True)