File size: 1,290 Bytes
7d81e6b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import gradio as gr
from transformers import AutoModel, AutoTokenizer
from sklearn.neighbors import NearestNeighbors



MODEL = "cardiffnlp/twitter-roberta-base-jun2022"
model = AutoModel.from_pretrained(MODEL)
tokenizer = AutoTokenizer.from_pretrained(MODEL)
embedding_matrix = model.embeddings.word_embeddings.weight
embedding_matrix = embedding_matrix.detach().numpy()

knn_model = NearestNeighbors(n_neighbors=500,
                         metric='cosine',
                         algorithm='auto',
                         n_jobs=3)
                
nbrs = knn_model.fit(embedding_matrix)

distances, indices = nbrs.kneighbors(embedding_matrix)


title = "How does a word's meaning change with time?"


def topk(word):
    outs = []
    index = tokenizer.encode(f'{word}')
    for i in indices[index[1]]:
        outs.append(tokenizer.decode(i))
        print(tokenizer.decode(i))

with gr.Blocks() as demo:
    gr.Markdown(f" # {title}")
    # gr.Markdown(f" ## {description1}")
    # gr.Markdown(f"{description2}")
    # gr.Markdown(f"{description3}")
    with gr.Row():
        word = gr.Textbox(label="Word")
    with gr.Row():
        greet_btn = gr.Button("Compute")
    with gr.Row():
        greet_btn.click(fn=topk, inputs=[word], outputs=gr.outputs.Textbox())

demo.launch()