File size: 4,270 Bytes
8a8ed77
260dd9d
8a8ed77
e107c7c
260dd9d
32dc508
260dd9d
 
 
de79df8
 
 
 
 
 
 
 
 
 
 
 
 
 
8a8ed77
260dd9d
 
 
 
e107c7c
 
 
 
 
260dd9d
de79df8
 
 
 
 
 
 
 
 
260dd9d
de79df8
 
 
 
 
 
 
 
 
 
 
 
e107c7c
de79df8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
643ca97
e107c7c
de79df8
 
 
 
 
 
 
 
 
 
 
a4b4327
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import gradio as gr
from gensim.models import KeyedVectors


def isNoneWords(word):
    if word is None or len(word)==0 or word not in model.key_to_index:
        return True
    else:
        return False
    
def word_analogy(word1, word2, word3):
    analogy_words = model.similar_by_vector(model.word_vec(word1) - model.word_vec(word2) + model.word_vec(word3))
    sim_res = ""
    for item in analogy_words:
        sim_res += f'{item[0]}: {round(item[1], 4)}\n'
    return sim_res
    
def similarity_route(word1, word2):
    if isNoneWords(word1) or isNoneWords(word2):
        return "word is null or not in model!"
    else:
        return float(model.similarity(word1, word2))


def top_similarity_route(word):
    if isNoneWords(word):
        return "word is null or not in model!"
    else:
        top_similar_words = model.similar_by_word(word, topn=20, restrict_vocab=None)
        sim_res = ""
        for item in top_similar_words:
            sim_res += f'{item[0]}: {round(item[1], 4)}\n'
        return sim_res

def top_similar_words_layout():
    with gr.Column():
        with gr.Row():
            with gr.Column():
                word = gr.Textbox(lines=1, label='Input word', placeholder='Input word here')
                with gr.Row():
                    clear = gr.ClearButton()
                    submit = gr.Button("Submit")
            output = gr.Textbox(lines=20, label='Similar words', placeholder='Output here')

    submit.click(fn=top_similarity_route, inputs=[word], outputs=[output])
    
    examples=[['兔子', '松鼠']]
    ex = gr.Examples(
        examples=examples,
        fn=top_similarity_route,
        inputs=[word],
        outputs=[output],
        cache_examples=False, 
        run_on_click=False
    )
    
    
def similarity_layout():
    with gr.Column():
        with gr.Row():
            with gr.Column():
                with gr.Row():
                    word1 = gr.Textbox(lines=1, label='Input word1', placeholder='Input word1 here')
                    word2 = gr.Textbox(lines=1, label='Input word2', placeholder='Input word2 here')
                with gr.Row():
                    clear = gr.ClearButton()
                    submit = gr.Button("Submit")
            output = gr.Textbox(lines=1, label='Similar words', placeholder='Output here')

    submit.click(fn=similarity_route, inputs=[word1, word2], outputs=[output])
    
    examples=[['淘宝', '京东', 0.7887385]]
    ex = gr.Examples(
        examples=examples,
        fn=similarity_route,
        inputs=[word1, word2],
        outputs=[output],
        cache_examples=False, 
        run_on_click=False
    )
    
def word_analogy_layout():
    with gr.Column():
        with gr.Row():
            with gr.Column():
                with gr.Row():
                    word1 = gr.Textbox(lines=1, label='Input word1', placeholder='Input word1 here')
                    word2 = gr.Textbox(lines=1, label='Input word2', placeholder='Input word2 here')
                    word3 = gr.Textbox(lines=1, label='Input word3', placeholder='Input word3 here')
                with gr.Row():
                    clear = gr.ClearButton()
                    submit = gr.Button("Submit")
            output = gr.Textbox(lines=1, label='Analogy words', placeholder='Output here')

    submit.click(fn=word_analogy, inputs=[word1, word2, word3], outputs=[output])
    
    examples=[['国王', '男人', '女人', '王后']]
    ex = gr.Examples(
        examples=examples,
        fn=word_analogy,
        inputs=[word1, word2, word3],
        outputs=[output],
        cache_examples=False, 
        run_on_click=False
    )

if __name__ == '__main__':
    model = KeyedVectors.load_word2vec_format('tencent-ailab-embedding-zh-d100-v0.2.0-s.txt', binary=False)
    title = 'Calculate word similarity based on Tencent AI Lab Embedding'

    with gr.Blocks() as demo:
        gr.HTML(title)
        with gr.Column(elem_id="col-container"):
            with gr.Tab("Top similar words"):
                top_similar_words_layout()
            with gr.Tab("Similarity of words"):
                similarity_layout()
            with gr.Tab("Word analogy"):
                word_analogy_layout()
            
    demo.queue(max_size=64).launch()