File size: 2,644 Bytes
7d81e6b
 
 
 
6bab54c
 
6b9b713
6bab54c
 
 
 
 
 
 
 
 
 
6b9b713
7d81e6b
6bab54c
 
 
 
 
 
 
 
 
 
7d81e6b
 
76e5451
7d81e6b
76e5451
7d81e6b
 
6bab54c
 
 
 
 
 
 
 
 
 
 
 
 
e7d3e05
052fd21
 
 
 
 
 
 
 
 
 
 
73ab6a3
052fd21
 
6b9b713
052fd21
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import gradio as gr
from transformers import AutoModel, AutoTokenizer
from sklearn.neighbors import NearestNeighbors

available_models = ['2019',
                    '2020']

model_2019 = AutoModel.from_pretrained('cardiffnlp/twitter-roberta-base-2019-90m')
tokenizers_2019 = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-2019-90m')
embedding_matrix_2019 = model_2019.embeddings.word_embeddings.weight
embedding_matrix_2019 = embedding_matrix_2019.detach().numpy()
knn_model_2019 = NearestNeighbors(n_neighbors=500,
                        metric='cosine',
                        algorithm='auto',
                        n_jobs=3)                
nbrs_2019 = knn_model_2019.fit(embedding_matrix_2019)
distances_2019, indices_2019 = nbrs_2019.kneighbors(embedding_matrix_2019)


model_2020 = AutoModel.from_pretrained('cardiffnlp/twitter-roberta-base-jun2020')
tokenizers_2020 = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-jun2020')
embedding_matrix_2020 = model_2020.embeddings.word_embeddings.weight
embedding_matrix_2020 = embedding_matrix_2020.detach().numpy()
knn_model_2020 = NearestNeighbors(n_neighbors=500,
                        metric='cosine',
                        algorithm='auto',
                        n_jobs=3)              
nbrs_2020 = knn_model_2020.fit(embedding_matrix_2020)
distances_2020, indices_2020 = nbrs_2020.kneighbors(embedding_matrix_2020)


title = "How does a word's meaning change with time?"

def topk(word,model):
    outs = []

    if model == '2019':
        index = tokenizers_2019.encode(f'{word}')
        for i in indices_2019[index[1]]:
            outs.append(tokenizers_2019.decode(i))
            print(tokenizers_2019.decode(i))
        return outs
    
    if model == '2020':
        index = tokenizers_2020.encode(f'{word}')
        for i in indices_2020[index[1]]:
            outs.append(tokenizers_2020.decode(i))
            print(tokenizers_2020.decode(i))
        return outs

# with gr.Blocks() as demo:
#     gr.Markdown(f" # {title}")
#     # gr.Markdown(f" ## {description1}")
#     # gr.Markdown(f"{description2}")
#     # gr.Markdown(f"{description3}")
#     with gr.Row():
#         word = gr.Textbox(label="Word")
#     with gr.Row():
#         greet_btn = gr.Button("Compute")
#     with gr.Row():
#         greet_btn.click(fn=topk, inputs=[word,gr.Dropdown(models)], outputs=gr.outputs.Textbox())
# demo.launch()

interface = gr.Interface(fn=topk, 
                        inputs=[gr.Textbox(label="Word"), gr.Dropdown(available_models)],
                        outputs=gr.outputs.Textbox()
                        )
interface.launch()