Spaces:
Running
Running
import gensim.downloader | |
import gradio as gr | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from sklearn.decomposition import PCA | |
from sklearn.manifold import TSNE | |
model = gensim.downloader.load("glove-wiki-gigaword-50") | |
# Function to reduce dimensions | |
def reduce_dimensions(data, method='PCA'): | |
if method == 'PCA': | |
model = PCA(n_components=2) | |
elif method == 'TSNE': | |
model = TSNE(n_components=2, learning_rate='auto', init='random', perplexity=3) | |
return model.fit_transform(data) | |
description = """ | |
### Word Embedding Demo App | |
Universidade Federal de São Paulo - Escola Paulista de Medicina | |
The output is Word3 + (Word2 - Word1) | |
Credits: | |
* Gensim | |
* Glove | |
""" | |
Word1 = gr.Textbox() | |
Word2 = gr.Textbox() | |
Word3 = gr.Textbox() | |
label = gr.Label(show_label=True, label="Word4") | |
sp = gr.ScatterPlot(x="x", y="y", color="color", label="label") | |
def inference(word1, word2, word3): | |
output = model.similar_by_vector(model[word3] + model[word2] - model[word1]) | |
print(output) | |
word_list = [word1, word2, word3] | |
word_list.extend([x for x,y in [item for item in output[:4]]]) | |
words = {key: model[key] for key in word_list} | |
data = np.concatenate([x[np.newaxis, :] for x in words.values()], axis=0) | |
print(data.shape) | |
labels = words.keys() | |
reduced_data_pca = reduce_dimensions(data, method='PCA') | |
print(reduced_data_pca.shape) | |
#''' | |
df = pd.DataFrame({ | |
"x": reduced_data_pca[:, 0], | |
"y": reduced_data_pca[:, 1], | |
"color": [x for x in labels][:len(data)] | |
#"label": ["W1", "W2", "W3", "W4", "W5", "W6", "W7"][:len(data)] | |
}) | |
#''' | |
return df | |
examples = [ | |
["woman", "man", "aunt"], | |
["woman", "man", "girl"], | |
["woman", "man", "granddaughter"], | |
] | |
iface = gr.Interface( | |
fn=inference, | |
inputs=[Word1, Word2, Word3], | |
outputs=sp, | |
description=description, | |
examples=examples | |
) | |
iface.launch() |