rachith's picture
bug fix
bdda483
raw
history blame
3.03 kB
import gradio as gr
from transformers import AutoModel, AutoTokenizer
from sklearn.neighbors import NearestNeighbors
title = "Temporal evolution of word association (Overselling :P)"
description = "Based on TimeLMs which is a RoBERTa model finetuned on tweets at periodic interval"
article = "This outputs the top 500 similar tokens to the input word, as a list. Stay tuned for more info"
available_models = ['2019',
'2020']
model_2019 = AutoModel.from_pretrained('cardiffnlp/twitter-roberta-base-2019-90m')
tokenizers_2019 = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-2019-90m')
embedding_matrix_2019 = model_2019.embeddings.word_embeddings.weight
embedding_matrix_2019 = embedding_matrix_2019.detach().numpy()
knn_model_2019 = NearestNeighbors(n_neighbors=500,
metric='cosine',
algorithm='auto',
n_jobs=3)
nbrs_2019 = knn_model_2019.fit(embedding_matrix_2019)
distances_2019, indices_2019 = nbrs_2019.kneighbors(embedding_matrix_2019)
model_2020 = AutoModel.from_pretrained('cardiffnlp/twitter-roberta-base-jun2020')
tokenizers_2020 = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-jun2020')
embedding_matrix_2020 = model_2020.embeddings.word_embeddings.weight
embedding_matrix_2020 = embedding_matrix_2020.detach().numpy()
knn_model_2020 = NearestNeighbors(n_neighbors=500,
metric='cosine',
algorithm='auto',
n_jobs=3)
nbrs_2020 = knn_model_2020.fit(embedding_matrix_2020)
distances_2020, indices_2020 = nbrs_2020.kneighbors(embedding_matrix_2020)
title = "How does a word's meaning change with time?"
def topk(word,model):
outs = []
if model == '2019':
index = tokenizers_2019.encode(f'{word}')
for i in indices_2019[index[1]]:
outs.append(tokenizers_2019.decode(i))
print(tokenizers_2019.decode(i))
return outs
if model == '2020':
index = tokenizers_2020.encode(f'{word}')
for i in indices_2020[index[1]]:
outs.append(tokenizers_2020.decode(i))
print(tokenizers_2020.decode(i))
return outs
# with gr.Blocks() as demo:
# gr.Markdown(f" # {title}")
# # gr.Markdown(f" ## {description1}")
# # gr.Markdown(f"{description2}")
# # gr.Markdown(f"{description3}")
# with gr.Row():
# word = gr.Textbox(label="Word")
# with gr.Row():
# greet_btn = gr.Button("Compute")
# with gr.Row():
# greet_btn.click(fn=topk, inputs=[word,gr.Dropdown(models)], outputs=gr.outputs.Textbox())
# demo.launch()
interface = gr.Interface(fn=topk,
inputs=[gr.Textbox(label="Word"), gr.Dropdown(available_models)],
outputs=gr.outputs.Textbox(),
title = title,
description = description,
article = article)
interface.launch()