Spaces:
Runtime error
Runtime error
File size: 3,999 Bytes
7d81e6b 70293a6 6bab54c 2a873fa 6b9b713 6bab54c 6b9b713 7d81e6b 6bab54c 7d81e6b 2a873fa 563f648 2a873fa 7d81e6b 76e5451 7d81e6b 76e5451 7d81e6b 6bab54c 2a873fa 6bab54c 2a873fa 6bab54c 2a873fa 6bab54c 2a873fa 6bab54c e7d3e05 052fd21 73ab6a3 052fd21 6b9b713 bdda483 70293a6 e685211 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
import gradio as gr
from transformers import AutoModel, AutoTokenizer
from sklearn.neighbors import NearestNeighbors
title = "Temporal evolution of word association (Overselling :P)"
description = "Based on TimeLMs which is a RoBERTa model finetuned on tweets at periodic interval"
article = "This outputs the top 500 similar tokens to the input word, as a list. Stay tuned for more info"
available_models = ['2019',
'2020',
'2022'
]
model_2019 = AutoModel.from_pretrained('cardiffnlp/twitter-roberta-base-2019-90m')
tokenizers_2019 = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-2019-90m')
embedding_matrix_2019 = model_2019.embeddings.word_embeddings.weight
embedding_matrix_2019 = embedding_matrix_2019.detach().numpy()
knn_model_2019 = NearestNeighbors(n_neighbors=500,
metric='cosine',
algorithm='auto',
n_jobs=3)
nbrs_2019 = knn_model_2019.fit(embedding_matrix_2019)
distances_2019, indices_2019 = nbrs_2019.kneighbors(embedding_matrix_2019)
model_2020 = AutoModel.from_pretrained('cardiffnlp/twitter-roberta-base-jun2020')
tokenizers_2020 = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-jun2020')
embedding_matrix_2020 = model_2020.embeddings.word_embeddings.weight
embedding_matrix_2020 = embedding_matrix_2020.detach().numpy()
knn_model_2020 = NearestNeighbors(n_neighbors=500,
metric='cosine',
algorithm='auto',
n_jobs=3)
nbrs_2020 = knn_model_2020.fit(embedding_matrix_2020)
distances_2020, indices_2020 = nbrs_2020.kneighbors(embedding_matrix_2020)
model_2022 = AutoModel.from_pretrained('cardiffnlp/twitter-roberta-base-2022-154m')
tokenizers_2022 = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-2022-154m')
embedding_matrix_2022 = model_2022.embeddings.word_embeddings.weight
embedding_matrix_2022 = embedding_matrix_2022.detach().numpy()
knn_model_2022 = NearestNeighbors(n_neighbors=500,
metric='cosine',
algorithm='auto',
n_jobs=3)
nbrs_2022 = knn_model_2022.fit(embedding_matrix_2022)
distances_2022, indices_2022 = nbrs_2020.kneighbors(embedding_matrix_2022)
title = "How does a word's meaning change with time?"
def topk(word,model):
outs = []
if model == '2019':
index = tokenizers_2019.encode(f'{word}')
print(index)
for i in indices_2019[index[1]]:
outs.append(tokenizers_2019.decode(i))
# print(tokenizers_2019.decode(i))
return outs
if model == '2020':
index = tokenizers_2020.encode(f'{word}')
print(index)
for i in indices_2020[index[1]]:
outs.append(tokenizers_2020.decode(i))
# print(tokenizers_2020.decode(i))
return outs
if model == '2022':
index = tokenizers_2022.encode(f'{word}')
print(index)
for i in indices_2022[index[1]]:
outs.append(tokenizers_2022.decode(i))
# print(tokenizers_2022decode(i))
return outs
# with gr.Blocks() as demo:
# gr.Markdown(f" # {title}")
# # gr.Markdown(f" ## {description1}")
# # gr.Markdown(f"{description2}")
# # gr.Markdown(f"{description3}")
# with gr.Row():
# word = gr.Textbox(label="Word")
# with gr.Row():
# greet_btn = gr.Button("Compute")
# with gr.Row():
# greet_btn.click(fn=topk, inputs=[word,gr.Dropdown(models)], outputs=gr.outputs.Textbox())
# demo.launch()
interface = gr.Interface(fn=topk,
inputs=[gr.Textbox(label="Word"), gr.Dropdown(available_models)],
outputs=gr.outputs.Textbox(),
title = title,
description = description,
article = article)
interface.launch() |