import gradio as gr from transformers import AutoModel, AutoTokenizer from sklearn.neighbors import NearestNeighbors available_models = ['cardiffnlp/twitter-roberta-base-2019-90m', 'cardiffnlp/twitter-roberta-base-jun2020'] models = {} tokenizers = {} for MODEL in available_models: models[MODEL] = AutoModel.from_pretrained(MODEL) tokenizers[MODEL] = AutoTokenizer.from_pretrained(MODEL) def topk_model(MODEL): # MODEL = "cardiffnlp/twitter-roberta-base-jun2022" # model = AutoModel.from_pretrained(MODEL) # tokenizer = AutoTokenizer.from_pretrained(MODEL) embedding_matrix = models[MODEL].embeddings.word_embeddings.weight embedding_matrix = embedding_matrix.detach().numpy() knn_model = NearestNeighbors(n_neighbors=500, metric='cosine', algorithm='auto', n_jobs=3) nbrs = knn_model.fit(embedding_matrix) distances, indices = nbrs.kneighbors(embedding_matrix) return distances,indices,tokenizers[MODEL] title = "How does a word's meaning change with time?" def topk(word,model): outs = [] distances, indices, tokenizer = topk_model(model) index = tokenizer.encode(f'{word}') for i in indices[index[1]]: outs.append(tokenizer.decode(i)) print(tokenizer.decode(i)) return outs # with gr.Blocks() as demo: # gr.Markdown(f" # {title}") # # gr.Markdown(f" ## {description1}") # # gr.Markdown(f"{description2}") # # gr.Markdown(f"{description3}") # with gr.Row(): # word = gr.Textbox(label="Word") # with gr.Row(): # greet_btn = gr.Button("Compute") # with gr.Row(): # greet_btn.click(fn=topk, inputs=[word,gr.Dropdown(models)], outputs=gr.outputs.Textbox()) # demo.launch() interface = gr.Interface(fn=topk, inputs=[gr.Textbox(label="Word"), gr.Dropdown(available_models)], outputs=gr.outputs.Textbox() ) interface.launch()