Spaces:

rachith
/

TemporalSemantics

Runtime error

App Files Files Community

TemporalSemantics / app.py

rachith

removing share = true

e685211 almost 2 years ago

raw

history blame contribute delete

4 kB

	import gradio as gr
	from transformers import AutoModel, AutoTokenizer
	from sklearn.neighbors import NearestNeighbors

	title = "Temporal evolution of word association (Overselling :P)"
	description = "Based on TimeLMs which is a RoBERTa model finetuned on tweets at periodic interval"
	article = "This outputs the top 500 similar tokens to the input word, as a list. Stay tuned for more info"

	available_models = ['2019',
	'2020',
	'2022'
	]

	model_2019 = AutoModel.from_pretrained('cardiffnlp/twitter-roberta-base-2019-90m')
	tokenizers_2019 = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-2019-90m')
	embedding_matrix_2019 = model_2019.embeddings.word_embeddings.weight
	embedding_matrix_2019 = embedding_matrix_2019.detach().numpy()
	knn_model_2019 = NearestNeighbors(n_neighbors=500,
	metric='cosine',
	algorithm='auto',
	n_jobs=3)
	nbrs_2019 = knn_model_2019.fit(embedding_matrix_2019)
	distances_2019, indices_2019 = nbrs_2019.kneighbors(embedding_matrix_2019)


	model_2020 = AutoModel.from_pretrained('cardiffnlp/twitter-roberta-base-jun2020')
	tokenizers_2020 = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-jun2020')
	embedding_matrix_2020 = model_2020.embeddings.word_embeddings.weight
	embedding_matrix_2020 = embedding_matrix_2020.detach().numpy()
	knn_model_2020 = NearestNeighbors(n_neighbors=500,
	metric='cosine',
	algorithm='auto',
	n_jobs=3)
	nbrs_2020 = knn_model_2020.fit(embedding_matrix_2020)
	distances_2020, indices_2020 = nbrs_2020.kneighbors(embedding_matrix_2020)

	model_2022 = AutoModel.from_pretrained('cardiffnlp/twitter-roberta-base-2022-154m')
	tokenizers_2022 = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-2022-154m')
	embedding_matrix_2022 = model_2022.embeddings.word_embeddings.weight
	embedding_matrix_2022 = embedding_matrix_2022.detach().numpy()
	knn_model_2022 = NearestNeighbors(n_neighbors=500,
	metric='cosine',
	algorithm='auto',
	n_jobs=3)
	nbrs_2022 = knn_model_2022.fit(embedding_matrix_2022)
	distances_2022, indices_2022 = nbrs_2020.kneighbors(embedding_matrix_2022)


	title = "How does a word's meaning change with time?"

	def topk(word,model):
	outs = []

	if model == '2019':
	index = tokenizers_2019.encode(f'{word}')
	print(index)
	for i in indices_2019[index[1]]:
	outs.append(tokenizers_2019.decode(i))
	# print(tokenizers_2019.decode(i))
	return outs

	if model == '2020':
	index = tokenizers_2020.encode(f'{word}')
	print(index)
	for i in indices_2020[index[1]]:
	outs.append(tokenizers_2020.decode(i))
	# print(tokenizers_2020.decode(i))
	return outs

	if model == '2022':
	index = tokenizers_2022.encode(f'{word}')
	print(index)
	for i in indices_2022[index[1]]:
	outs.append(tokenizers_2022.decode(i))
	# print(tokenizers_2022decode(i))
	return outs

	# with gr.Blocks() as demo:
	# gr.Markdown(f" # {title}")
	# # gr.Markdown(f" ## {description1}")
	# # gr.Markdown(f"{description2}")
	# # gr.Markdown(f"{description3}")
	# with gr.Row():
	# word = gr.Textbox(label="Word")
	# with gr.Row():
	# greet_btn = gr.Button("Compute")
	# with gr.Row():
	# greet_btn.click(fn=topk, inputs=[word,gr.Dropdown(models)], outputs=gr.outputs.Textbox())
	# demo.launch()

	interface = gr.Interface(fn=topk,
	inputs=[gr.Textbox(label="Word"), gr.Dropdown(available_models)],
	outputs=gr.outputs.Textbox(),
	title = title,
	description = description,
	article = article)
	interface.launch()