Spaces:
Running
Running
File size: 4,270 Bytes
8a8ed77 260dd9d 8a8ed77 e107c7c 260dd9d 32dc508 260dd9d de79df8 8a8ed77 260dd9d e107c7c 260dd9d de79df8 260dd9d de79df8 e107c7c de79df8 643ca97 e107c7c de79df8 a4b4327 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
import gradio as gr
from gensim.models import KeyedVectors
def isNoneWords(word):
if word is None or len(word)==0 or word not in model.key_to_index:
return True
else:
return False
def word_analogy(word1, word2, word3):
analogy_words = model.similar_by_vector(model.word_vec(word1) - model.word_vec(word2) + model.word_vec(word3))
sim_res = ""
for item in analogy_words:
sim_res += f'{item[0]}: {round(item[1], 4)}\n'
return sim_res
def similarity_route(word1, word2):
if isNoneWords(word1) or isNoneWords(word2):
return "word is null or not in model!"
else:
return float(model.similarity(word1, word2))
def top_similarity_route(word):
if isNoneWords(word):
return "word is null or not in model!"
else:
top_similar_words = model.similar_by_word(word, topn=20, restrict_vocab=None)
sim_res = ""
for item in top_similar_words:
sim_res += f'{item[0]}: {round(item[1], 4)}\n'
return sim_res
def top_similar_words_layout():
with gr.Column():
with gr.Row():
with gr.Column():
word = gr.Textbox(lines=1, label='Input word', placeholder='Input word here')
with gr.Row():
clear = gr.ClearButton()
submit = gr.Button("Submit")
output = gr.Textbox(lines=20, label='Similar words', placeholder='Output here')
submit.click(fn=top_similarity_route, inputs=[word], outputs=[output])
examples=[['兔子', '松鼠']]
ex = gr.Examples(
examples=examples,
fn=top_similarity_route,
inputs=[word],
outputs=[output],
cache_examples=False,
run_on_click=False
)
def similarity_layout():
with gr.Column():
with gr.Row():
with gr.Column():
with gr.Row():
word1 = gr.Textbox(lines=1, label='Input word1', placeholder='Input word1 here')
word2 = gr.Textbox(lines=1, label='Input word2', placeholder='Input word2 here')
with gr.Row():
clear = gr.ClearButton()
submit = gr.Button("Submit")
output = gr.Textbox(lines=1, label='Similar words', placeholder='Output here')
submit.click(fn=similarity_route, inputs=[word1, word2], outputs=[output])
examples=[['淘宝', '京东', 0.7887385]]
ex = gr.Examples(
examples=examples,
fn=similarity_route,
inputs=[word1, word2],
outputs=[output],
cache_examples=False,
run_on_click=False
)
def word_analogy_layout():
with gr.Column():
with gr.Row():
with gr.Column():
with gr.Row():
word1 = gr.Textbox(lines=1, label='Input word1', placeholder='Input word1 here')
word2 = gr.Textbox(lines=1, label='Input word2', placeholder='Input word2 here')
word3 = gr.Textbox(lines=1, label='Input word3', placeholder='Input word3 here')
with gr.Row():
clear = gr.ClearButton()
submit = gr.Button("Submit")
output = gr.Textbox(lines=1, label='Analogy words', placeholder='Output here')
submit.click(fn=word_analogy, inputs=[word1, word2, word3], outputs=[output])
examples=[['国王', '男人', '女人', '王后']]
ex = gr.Examples(
examples=examples,
fn=word_analogy,
inputs=[word1, word2, word3],
outputs=[output],
cache_examples=False,
run_on_click=False
)
if __name__ == '__main__':
model = KeyedVectors.load_word2vec_format('tencent-ailab-embedding-zh-d100-v0.2.0-s.txt', binary=False)
title = 'Calculate word similarity based on Tencent AI Lab Embedding'
with gr.Blocks() as demo:
gr.HTML(title)
with gr.Column(elem_id="col-container"):
with gr.Tab("Top similar words"):
top_similar_words_layout()
with gr.Tab("Similarity of words"):
similarity_layout()
with gr.Tab("Word analogy"):
word_analogy_layout()
demo.queue(max_size=64).launch() |