word_embeddings / app.py
felipekitamura's picture
Update app.py
34222dc verified
import gensim.downloader
import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
model = gensim.downloader.load("word2vec-google-news-300") #glove-wiki-gigaword-50
cache = "/home/user/app/d.png"
# Function to reduce dimensions
def reduce_dimensions(data, method='PCA'):
if method == 'PCA':
model = PCA(n_components=2)
elif method == 'TSNE':
model = TSNE(n_components=2, learning_rate='auto', init='random', perplexity=4)
return model.fit_transform(data)
# Plotting function
def plot_reduced_data(reduced_data, labels, title):
plt.figure(figsize=(10, 8))
plt.scatter(reduced_data[:, 0], reduced_data[:, 1], alpha=0.6)
for i, label in enumerate(labels):
plt.annotate(" " + label, (reduced_data[i, 0], reduced_data[i, 1]), fontsize=18)
plt.title(title)
# Data for the arrow 1
start_point = (reduced_data[0, 0], reduced_data[0, 1]) # Starting point of the arrow
end_point = (reduced_data[1, 0], reduced_data[1, 1]) # Ending point of the arrow
# Adding an arrow 1
plt.annotate('', xy=end_point, xytext=start_point,
arrowprops=dict(arrowstyle="->", color='green', lw=3))
# Data for the arrow 2
end_point = (reduced_data[-1, 0] , reduced_data[-1, 1]) # Starting point of the arrow
start_point = (reduced_data[2, 0], reduced_data[2, 1]) # Ending point of the arrow
# Adding an arrow 2
plt.annotate('', xy=end_point, xytext=start_point,
arrowprops=dict(arrowstyle="->", color='green', lw=3))
plt.xlabel('Component 1')
plt.ylabel('Component 2')
plt.grid(True)
plt.savefig(cache) #, dpi=300)
description = """
### Word Embedding Demo App
Universidade Federal de São Paulo - Escola Paulista de Medicina
The output is Word3 + (Word2 - Word1)
Credits:
* Gensim
* Word2Vec
"""
Word1 = gr.Textbox()
Word2 = gr.Textbox()
Word3 = gr.Textbox()
label = gr.Label(show_label=True, label="Word4")
sp = gr.Image()
def inference(word1, word2, word3):
transform = model[word3] + model[word2] - model[word1]
output = model.similar_by_vector(transform)
print(output)
word_list = [word1, word2, word3]
word_list.extend([x for x,y in [item for item in output[:6]]])
words = {key: model[key] for key in word_list}
words[word3 + " + (" + word2 + " - " + word1 + ")"] = transform
data = np.concatenate([x[np.newaxis, :] for x in words.values()], axis=0)
print(data.shape)
labels = words.keys()
reduced_data_pca = reduce_dimensions(data, method='PCA')
print(reduced_data_pca.shape)
plot_reduced_data(reduced_data_pca, labels, 'PCA Results')
return cache
examples = [
["woman", "man", "girl"],
["woman", "man", "granddaughter"],
["woman", "man", "aunt"],
]
iface = gr.Interface(
fn=inference,
inputs=[Word1, Word2, Word3],
outputs=sp,
description=description,
examples=examples
)
iface.launch()