File size: 3,999 Bytes
7d81e6b
 
 
 
70293a6
 
 
 
6bab54c
2a873fa
 
 
6b9b713
6bab54c
 
 
 
 
 
 
 
 
 
6b9b713
7d81e6b
6bab54c
 
 
 
 
 
 
 
 
 
7d81e6b
2a873fa
563f648
2a873fa
 
 
 
 
 
 
 
 
7d81e6b
76e5451
7d81e6b
76e5451
7d81e6b
 
6bab54c
 
2a873fa
6bab54c
 
2a873fa
6bab54c
 
 
 
2a873fa
6bab54c
 
2a873fa
 
 
 
 
 
 
 
 
6bab54c
e7d3e05
052fd21
 
 
 
 
 
 
 
 
 
 
73ab6a3
052fd21
 
6b9b713
bdda483
70293a6
 
 
e685211
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import gradio as gr
from transformers import AutoModel, AutoTokenizer
from sklearn.neighbors import NearestNeighbors

title = "Temporal evolution of word association (Overselling :P)"
description = "Based on TimeLMs which is a RoBERTa model finetuned on tweets at periodic interval"
article = "This outputs the top 500 similar tokens to the input word, as a list. Stay tuned for more info"

available_models = ['2019',
                    '2020',
                    '2022'
                    ]

model_2019 = AutoModel.from_pretrained('cardiffnlp/twitter-roberta-base-2019-90m')
tokenizers_2019 = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-2019-90m')
embedding_matrix_2019 = model_2019.embeddings.word_embeddings.weight
embedding_matrix_2019 = embedding_matrix_2019.detach().numpy()
knn_model_2019 = NearestNeighbors(n_neighbors=500,
                        metric='cosine',
                        algorithm='auto',
                        n_jobs=3)                
nbrs_2019 = knn_model_2019.fit(embedding_matrix_2019)
distances_2019, indices_2019 = nbrs_2019.kneighbors(embedding_matrix_2019)


model_2020 = AutoModel.from_pretrained('cardiffnlp/twitter-roberta-base-jun2020')
tokenizers_2020 = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-jun2020')
embedding_matrix_2020 = model_2020.embeddings.word_embeddings.weight
embedding_matrix_2020 = embedding_matrix_2020.detach().numpy()
knn_model_2020 = NearestNeighbors(n_neighbors=500,
                        metric='cosine',
                        algorithm='auto',
                        n_jobs=3)              
nbrs_2020 = knn_model_2020.fit(embedding_matrix_2020)
distances_2020, indices_2020 = nbrs_2020.kneighbors(embedding_matrix_2020)

model_2022 = AutoModel.from_pretrained('cardiffnlp/twitter-roberta-base-2022-154m')
tokenizers_2022 = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-2022-154m')
embedding_matrix_2022 = model_2022.embeddings.word_embeddings.weight
embedding_matrix_2022 = embedding_matrix_2022.detach().numpy()
knn_model_2022 = NearestNeighbors(n_neighbors=500,
                        metric='cosine',
                        algorithm='auto',
                        n_jobs=3)              
nbrs_2022 = knn_model_2022.fit(embedding_matrix_2022)
distances_2022, indices_2022 = nbrs_2020.kneighbors(embedding_matrix_2022)


title = "How does a word's meaning change with time?"

def topk(word,model):
    outs = []

    if model == '2019':
        index = tokenizers_2019.encode(f'{word}')
        print(index)
        for i in indices_2019[index[1]]:
            outs.append(tokenizers_2019.decode(i))
            # print(tokenizers_2019.decode(i))
        return outs
    
    if model == '2020':
        index = tokenizers_2020.encode(f'{word}')
        print(index)
        for i in indices_2020[index[1]]:
            outs.append(tokenizers_2020.decode(i))
            # print(tokenizers_2020.decode(i))
        return outs
    
    if model == '2022':
        index = tokenizers_2022.encode(f'{word}')
        print(index)
        for i in indices_2022[index[1]]:
            outs.append(tokenizers_2022.decode(i))
            # print(tokenizers_2022decode(i))
        return outs

# with gr.Blocks() as demo:
#     gr.Markdown(f" # {title}")
#     # gr.Markdown(f" ## {description1}")
#     # gr.Markdown(f"{description2}")
#     # gr.Markdown(f"{description3}")
#     with gr.Row():
#         word = gr.Textbox(label="Word")
#     with gr.Row():
#         greet_btn = gr.Button("Compute")
#     with gr.Row():
#         greet_btn.click(fn=topk, inputs=[word,gr.Dropdown(models)], outputs=gr.outputs.Textbox())
# demo.launch()

interface = gr.Interface(fn=topk, 
                        inputs=[gr.Textbox(label="Word"), gr.Dropdown(available_models)],
                        outputs=gr.outputs.Textbox(),
                        title = title,
                        description = description,
                        article = article)
interface.launch()