Spaces:

rachith
/

TemporalSemantics

Runtime error

File size: 2,644 Bytes

7d81e6b
 
 
 
6bab54c
 
6b9b713
6bab54c
 
 
 
 
 
 
 
 
 
6b9b713
7d81e6b
6bab54c
 
 
 
 
 
 
 
 
 
7d81e6b
 
76e5451
7d81e6b
76e5451
7d81e6b
 
6bab54c
 
 
 
 
 
 
 
 
 
 
 
 
e7d3e05
052fd21
 
 
 
 
 
 
 
 
 
 
73ab6a3
052fd21
 
6b9b713
052fd21

import gradio as gr
from transformers import AutoModel, AutoTokenizer
from sklearn.neighbors import NearestNeighbors

available_models = ['2019',
                    '2020']

model_2019 = AutoModel.from_pretrained('cardiffnlp/twitter-roberta-base-2019-90m')
tokenizers_2019 = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-2019-90m')
embedding_matrix_2019 = model_2019.embeddings.word_embeddings.weight
embedding_matrix_2019 = embedding_matrix_2019.detach().numpy()
knn_model_2019 = NearestNeighbors(n_neighbors=500,
                        metric='cosine',
                        algorithm='auto',
                        n_jobs=3)                
nbrs_2019 = knn_model_2019.fit(embedding_matrix_2019)
distances_2019, indices_2019 = nbrs_2019.kneighbors(embedding_matrix_2019)


model_2020 = AutoModel.from_pretrained('cardiffnlp/twitter-roberta-base-jun2020')
tokenizers_2020 = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-jun2020')
embedding_matrix_2020 = model_2020.embeddings.word_embeddings.weight
embedding_matrix_2020 = embedding_matrix_2020.detach().numpy()
knn_model_2020 = NearestNeighbors(n_neighbors=500,
                        metric='cosine',
                        algorithm='auto',
                        n_jobs=3)              
nbrs_2020 = knn_model_2020.fit(embedding_matrix_2020)
distances_2020, indices_2020 = nbrs_2020.kneighbors(embedding_matrix_2020)


title = "How does a word's meaning change with time?"

def topk(word,model):
    outs = []

    if model == '2019':
        index = tokenizers_2019.encode(f'{word}')
        for i in indices_2019[index[1]]:
            outs.append(tokenizers_2019.decode(i))
            print(tokenizers_2019.decode(i))
        return outs
    
    if model == '2020':
        index = tokenizers_2020.encode(f'{word}')
        for i in indices_2020[index[1]]:
            outs.append(tokenizers_2020.decode(i))
            print(tokenizers_2020.decode(i))
        return outs

# with gr.Blocks() as demo:
#     gr.Markdown(f" # {title}")
#     # gr.Markdown(f" ## {description1}")
#     # gr.Markdown(f"{description2}")
#     # gr.Markdown(f"{description3}")
#     with gr.Row():
#         word = gr.Textbox(label="Word")
#     with gr.Row():
#         greet_btn = gr.Button("Compute")
#     with gr.Row():
#         greet_btn.click(fn=topk, inputs=[word,gr.Dropdown(models)], outputs=gr.outputs.Textbox())
# demo.launch()

interface = gr.Interface(fn=topk, 
                        inputs=[gr.Textbox(label="Word"), gr.Dropdown(available_models)],
                        outputs=gr.outputs.Textbox()
                        )
interface.launch()