karmiq's picture
Fix displaying output for first example
bddb0ac
raw
history blame contribute delete
No virus
3.12 kB
import os
from dataclasses import dataclass
from operator import add, sub
import gradio as gr
import numpy as np
from datasets import load_dataset
from sklearn.metrics.pairwise import cosine_similarity
from pyparsing import Word, alphas, Char, ParseException
term = Word(alphas)
operator = Char("+ -")
expression = term + (operator + term)[...]
operations = {"+": add, "-": sub}
def parse_expression(input):
try:
return expression.parseString(input)
except ParseException as e:
raise gr.Error(f"Parsing error: {e.msg} at position [{e.loc}].")
def evaluate_expression(input):
# Skip every other item
words = input[::2]
operators = input[1::2]
result = word_to_vectors(words[0])
for operator, word in zip(operators, words[1:]):
result = operations[operator](result, word_to_vectors(word))
return result
dataset = load_dataset("karmiq/glove", split="train")
df = dataset.to_pandas()
all_words = df["word"].to_numpy()
all_vectors = np.array(df["embeddings"].to_list())
def word_to_vectors(word):
result = df.loc[df["word"] == word].embeddings.to_numpy()
if len(result) < 1:
raise gr.Error("Word not found in the dictionary.")
else:
return result[0]
def expression_to_vectors(input):
return evaluate_expression(parse_expression(input))
def get_results(expression):
if len(expression) < 1:
raise gr.Error("Please provide an expression.")
expression = expression.lower()
vectors = expression_to_vectors(expression)
similarity_scores = cosine_similarity([vectors], all_vectors)[0]
top_indices = np.argsort(similarity_scores)[::-1]
return dict(
[
(all_words[i], similarity_scores[i])
for i in top_indices
if not all_words[i] in expression.split()
][:10]
)
examples = [
"king - man + woman",
"mother - woman + man",
"berlin - germany + france",
"saxophone - jazz + classical",
]
initial_output = get_results(examples[0])
css = """
button.gallery-item { color: var(--body-text-color) !important; }
.output-class { color: var(--color-red-700) !important; }
.confidence-set .label .text { font-weight: var(--weight-medium); }
.confidence-set:hover .label { color: var(--color-red-700) !important; }
"""
with gr.Blocks(
css=css,
theme=gr.themes.Monochrome(radius_size=gr.themes.sizes.radius_sm),
) as app:
with gr.Row():
with gr.Column():
input = gr.Textbox(value=examples[0], label="Expression")
with gr.Row():
btn = gr.Button("Run", variant="primary")
with gr.Row():
gr.Markdown(
"Demonstration of computing cosine similarity of embeddings "
"from the [GloVe](https://nlp.stanford.edu/projects/glove/) dataset."
)
with gr.Row():
gr.Examples(examples, inputs=input)
with gr.Column():
output = gr.Label(label="Closest words", value=initial_output)
btn.click(fn=get_results, inputs=input, outputs=output)
app.launch()