import gradio as gr from gensim.models import KeyedVectors def isNoneWords(word): if word is None or len(word)==0 or word not in model.key_to_index: return True else: return False def word_analogy(word1, word2, word3): analogy_words = model.similar_by_vector(model.word_vec(word1) - model.word_vec(word2) + model.word_vec(word3)) sim_res = "" for item in analogy_words: sim_res += f'{item[0]}: {round(item[1], 4)}\n' return sim_res def similarity_route(word1, word2): if isNoneWords(word1) or isNoneWords(word2): return "word is null or not in model!" else: return float(model.similarity(word1, word2)) def top_similarity_route(word): if isNoneWords(word): return "word is null or not in model!" else: top_similar_words = model.similar_by_word(word, topn=20, restrict_vocab=None) sim_res = "" for item in top_similar_words: sim_res += f'{item[0]}: {round(item[1], 4)}\n' return sim_res def top_similar_words_layout(): with gr.Column(): with gr.Row(): with gr.Column(): word = gr.Textbox(lines=1, label='Input word', placeholder='Input word here') with gr.Row(): clear = gr.ClearButton() submit = gr.Button("Submit") output = gr.Textbox(lines=20, label='Similar words', placeholder='Output here') submit.click(fn=top_similarity_route, inputs=[word], outputs=[output]) examples=[['兔子', '松鼠']] ex = gr.Examples( examples=examples, fn=top_similarity_route, inputs=[word], outputs=[output], cache_examples=False, run_on_click=False ) def similarity_layout(): with gr.Column(): with gr.Row(): with gr.Column(): with gr.Row(): word1 = gr.Textbox(lines=1, label='Input word1', placeholder='Input word1 here') word2 = gr.Textbox(lines=1, label='Input word2', placeholder='Input word2 here') with gr.Row(): clear = gr.ClearButton() submit = gr.Button("Submit") output = gr.Textbox(lines=1, label='Similar words', placeholder='Output here') submit.click(fn=similarity_route, inputs=[word1, word2], outputs=[output]) examples=[['淘宝', '京东', 0.7887385]] ex = gr.Examples( examples=examples, fn=similarity_route, inputs=[word1, word2], outputs=[output], cache_examples=False, run_on_click=False ) def word_analogy_layout(): with gr.Column(): with gr.Row(): with gr.Column(): with gr.Row(): word1 = gr.Textbox(lines=1, label='Input word1', placeholder='Input word1 here') word2 = gr.Textbox(lines=1, label='Input word2', placeholder='Input word2 here') word3 = gr.Textbox(lines=1, label='Input word3', placeholder='Input word3 here') with gr.Row(): clear = gr.ClearButton() submit = gr.Button("Submit") output = gr.Textbox(lines=1, label='Analogy words', placeholder='Output here') submit.click(fn=word_analogy, inputs=[word1, word2, word3], outputs=[output]) examples=[['国王', '男人', '女人', '王后']] ex = gr.Examples( examples=examples, fn=word_analogy, inputs=[word1, word2, word3], outputs=[output], cache_examples=False, run_on_click=False ) if __name__ == '__main__': model = KeyedVectors.load_word2vec_format('tencent-ailab-embedding-zh-d100-v0.2.0-s.txt', binary=False) title = 'Calculate word similarity based on Tencent AI Lab Embedding' with gr.Blocks() as demo: gr.HTML(title) with gr.Column(elem_id="col-container"): with gr.Tab("Top similar words"): top_similar_words_layout() with gr.Tab("Similarity of words"): similarity_layout() with gr.Tab("Word analogy"): word_analogy_layout() demo.queue(max_size=64).launch()