Spaces:
Build error
Build error
from itertools import product | |
import random | |
from turtle import hideturtle | |
import requests | |
import json | |
import lxml.etree as ET | |
import gensim | |
import pandas as pd | |
import nltk | |
# from nltk.corpus import framenet as fn | |
# --- circumvent threading issues with FrameNet | |
fn_root = nltk.data.find("{}/{}".format("corpora", "framenet_v17")) | |
print(fn_root) | |
fn_files = ["frRelation.xml", "frameIndex.xml", "fulltextIndex.xml", "luIndex.xml", "semTypes.xml"] | |
fn = nltk.corpus.reader.framenet.FramenetCorpusReader(fn_root, fn_files) | |
# --- | |
import streamlit as st | |
from sociolome import lome_wrapper | |
def similarity(gensim_m, frame_1, frame_2): | |
if f"fn_{frame_1}" not in gensim_m or f"fn_{frame_2}" not in gensim_m: | |
return None | |
return 1 - gensim_m.distance(f"fn_{frame_1}", f"fn_{frame_2}") | |
def rank(gensim_m, frame_1, frame_2): | |
frame_1 = f"fn_{frame_1}" | |
frame_2 = f"fn_{frame_2}" | |
if frame_1 == frame_2: | |
return 0 | |
for i, (word, _) in enumerate(gensim_m.most_similar(frame_1, topn=1200)): | |
if word == frame_2: | |
return i + 1 | |
return -1 | |
def format_frame_description(frame_def_xml): | |
frame_def_fmt = [frame_def_xml.text] if frame_def_xml.text else [] | |
for elem in frame_def_xml: | |
if elem.tag == "ex": | |
break | |
elif elem.tag == "fen": | |
frame_def_fmt.append(elem.text.upper()) | |
elif elem.text: | |
frame_def_fmt.append(elem.text) | |
if elem.tail: | |
frame_def_fmt.append(elem.tail) | |
return "".join(frame_def_fmt).replace("frames", "stories").replace("frame", "story") | |
def get_frame_definition(frame_info): | |
try: | |
# try extracting just the first sentence | |
definition_first_sent = nltk.sent_tokenize(frame_info.definitionMarkup)[0] + "</def-root>" | |
frame_def_xml = ET.fromstring(definition_first_sent) | |
except ET.XMLSyntaxError: | |
# otherwise, use the full definition | |
frame_def_xml = ET.fromstring(frame_info.definitionMarkup) | |
return format_frame_description(frame_def_xml) | |
def get_random_example(frame_info): | |
exemplars = [ | |
{ | |
"text": exemplar.text, | |
"target_lu": lu_name, | |
"target_idx": list(exemplar["Target"][0]), | |
"core_fes": { | |
role: exemplar.text[start_idx:end_idx] | |
for role, start_idx, end_idx in exemplar.FE[0] | |
if role in [fe for fe, fe_info in frame_info.FE.items() if fe_info.coreType == "Core"] | |
} | |
} | |
for lu_name, lu_info in frame_info["lexUnit"].items() | |
for exemplar in lu_info.exemplars if len(exemplar.text) > 30 | |
] | |
if exemplars: | |
return random.choice(exemplars) | |
return None | |
def make_hint(gensim_m, target, current_closest): | |
if target == current_closest: | |
return None | |
most_similar = gensim_m.most_similar(f"fn_{target}", topn=1200) | |
current_position = [word for word, _ in most_similar].index(f"fn_{current_closest}") | |
while current_position > 0: | |
next_closest, _ = most_similar[current_position - 1] | |
info = fn.frame(next_closest.replace("fn_", "")) | |
if len(info.lexUnit) > 10: | |
exemplar = get_random_example(info) | |
if exemplar: | |
return next_closest, exemplar | |
current_position -= 1 | |
return None | |
def get_typical_exemplar(frame_info): | |
exemplars = [ | |
{ | |
"text": exemplar.text, | |
"target_lu": lu_name, | |
"target_idx": list(exemplar["Target"][0]), | |
"core_fes": { | |
role: exemplar.text[start_idx:end_idx] | |
for role, start_idx, end_idx in exemplar.FE[0] | |
if role in [fe for fe, fe_info in frame_info.FE.items() if fe_info.coreType == "Core"] | |
} | |
} | |
for lu_name, lu_info in frame_info["lexUnit"].items() | |
for exemplar in lu_info.exemplars | |
] | |
# try to find a "typical" exemplar --- typical -> as short as possible, as many FEs as possible | |
exa_typicality_scores = [(exa, len(exa["text"]) - 25 * len(exa["core_fes"])) for exa in exemplars] | |
if exa_typicality_scores: | |
typical_exemplar = min(exa_typicality_scores, key=lambda t: t[1])[0] | |
else: | |
typical_exemplar = None | |
return typical_exemplar | |
def find_all_inheriting_frames(frame_name): | |
frame_info = fn.frame(frame_name) | |
inheritance_rels = [rel for rel in frame_info.frameRelations if rel.type.name == "Inheritance" and rel.superFrame.name == frame_name] | |
inheritors = [rel.subFrame.name for rel in inheritance_rels] | |
for inh in inheritors: | |
inheritors.extend(find_all_inheriting_frames(inh)) | |
return inheritors | |
def has_enough_lus(frame, n=10): | |
return len(fn.frame(frame).lexUnit) > n | |
def choose_secret_frames(): | |
event_frames = [frm for frm in find_all_inheriting_frames("Event") if has_enough_lus(frm)] | |
entity_frames = [frm for frm in find_all_inheriting_frames("Entity") if has_enough_lus(frm)] | |
return random.choice(list(product(event_frames, entity_frames))) | |
def get_frame_info(frames): | |
frames_and_info = [] | |
for evoked_frame in frames: | |
try: | |
frame_info = fn.frame(evoked_frame) | |
typical_sentence = get_typical_exemplar(frame_info) | |
frames_and_info.append((evoked_frame, frame_info, typical_sentence)) | |
except FileNotFoundError: | |
continue | |
return frames_and_info | |
def get_frame_feedback(frames_and_info, gensim_m, secret_event, secret_entity): | |
frame_feedback = [] | |
for evoked_frame, frame_info, typical_sentence in frames_and_info: | |
lexunits = list(frame_info.lexUnit.keys())[:5] | |
similarity_score_1 = similarity(gensim_m, secret_event, evoked_frame) | |
similarity_rank_1 = rank(gensim_m, secret_event, evoked_frame) | |
similarity_score_2 = similarity(gensim_m, secret_entity, evoked_frame) | |
similarity_rank_2 = rank(gensim_m, secret_entity, evoked_frame) | |
if typical_sentence: | |
typical_sentence_txt = typical_sentence['text'] | |
else: | |
typical_sentence_txt = None | |
frame_feedback.append({ | |
"frame": evoked_frame, | |
"similarity_1": similarity_score_1 * 100 if similarity_score_1 else None, | |
"rank_1": similarity_rank_1 if similarity_rank_1 != -1 else "far away", | |
"similarity_2": similarity_score_2 * 100 if similarity_score_2 else None, | |
"rank_2": similarity_rank_2 if similarity_rank_2 != -1 else "far away", | |
"typical_words": lexunits, | |
"typical_sentence": typical_sentence_txt | |
}) | |
return frame_feedback | |
def run_game_cli(debug=True): | |
secret_event, secret_entity = choose_secret_frames() | |
if debug: | |
print(f"Shhhhhh you're not supposed to know, but the secret frames are {secret_event} and {secret_entity}") | |
print("--------\n\n\n\n") | |
print("Welcome to FillmorLe!") | |
print("Words are not just words: behind every word, a story is hidden that appears in our imagination when we hear the word.") | |
print() | |
print("In this game, your job is to activate TWO SECRET STORIES by writing sentences.") | |
print("There will be new secret stories every day -- the first story is always about an EVENT (something that happens in the world) and the second one about an ENTITY (a thing or concept).") | |
print("Every time you write a sentence, I will tell you which stories are hidden below the surface, and how close these stories are to the secret stories.") | |
print("Once you write a sentence that has both of the secret stories in it, you win. Good luck and be creative!") | |
gensim_m = gensim.models.word2vec.KeyedVectors.load_word2vec_format("data/frame_embeddings.w2v.txt") | |
num_guesses = 0 | |
guesses_event = [] | |
guesses_entity = [] | |
while True: | |
num_guesses += 1 | |
closest_to_event = sorted(guesses_event, key=lambda g: g[1], reverse=True)[:5] | |
closest_to_entity = sorted(guesses_entity, key=lambda g: g[1], reverse=True)[:5] | |
closest_to_event_txt = ", ".join([f"{frm.upper()} ({sim:.2f})" for frm, sim in closest_to_event]) | |
closest_to_entity_txt = ", ".join([f"{frm.upper()} ({sim:.2f})" for frm, sim in closest_to_entity]) | |
print() | |
print(f"==== Guess #{num_guesses} ====") | |
if secret_event in guesses_event: | |
print("You already guessed SECRET STORY #1: ", secret_event.upper()) | |
elif closest_to_event: | |
print(f"Best guesses (SECRET STORY #1):", closest_to_event_txt) | |
if secret_entity in guesses_entity: | |
print("You already guessed SECRET STORY #1: ", secret_entity.upper()) | |
elif closest_to_entity: | |
print(f"Best guesses (SECRET STORY #2):", closest_to_entity_txt) | |
sentence = input("Enter a sentence or type 'HINT' if you're stuck >>>> ").strip() | |
if sentence == "HINT": | |
hint_target = None | |
while not hint_target: | |
hint_choice = input("For which story do you want a hint? Type '1' or '2' >>>> ").strip() | |
if hint_choice == "1": | |
hint_target = secret_event | |
hint_current = closest_to_event[0][0] if closest_to_event else "Event" | |
elif hint_choice == "2": | |
hint_target = secret_entity | |
hint_current = closest_to_entity[0][0] if closest_to_entity else "Entity" | |
else: | |
print("Please type '1' or '2'.") | |
if hint_current == hint_target: | |
print("You don't need a hint for this story! Maybe you want a hint for the other one?") | |
continue | |
hint = make_hint(gensim_m, hint_target, hint_current) | |
if hint is None: | |
print("Sorry, you're already too close to give you a hint!") | |
else: | |
_, hint_example = hint | |
hint_tgt_idx = hint_example["target_idx"] | |
hint_example_redacted = hint_example["text"][:hint_tgt_idx[0]] + "******" + hint_example["text"][hint_tgt_idx[1]:] | |
print(f"Your hint sentence is: «{hint_example_redacted}»") | |
print(f"PRO TIP 1: the '******' hide a secret word. Guess the word and you will find a story that takes your one step closer to find SECRET STORY #{hint_choice}") | |
print(f"PRO TIP 2: if you don't get the hint, just ask for a new one! You can do this as often as you want.") | |
print("\n\n") | |
continue | |
r = requests.get("http://127.0.0.1:9090/analyze", params={"text": sentence}) | |
lome_data = json.loads(r.text) | |
frames = set() | |
for token_items in lome_data["analyses"][0]["frame_list"]: | |
for item in token_items: | |
if item.startswith("T:"): | |
evoked_frame = item.split("@")[0].replace("T:", "") | |
frames.add(evoked_frame) | |
frames_and_info = get_frame_info(frames) | |
frame_feedback = get_frame_feedback(frames_and_info) | |
for i, feedback in enumerate(frame_feedback): | |
print(f"STORY {i}: {feedback['frame'].upper()}") | |
if feedback["typical_sentence"]: | |
print(f"\ttypical context: «{feedback['typical_sentence']}»") | |
print("\ttypical words:", ", ".join(feedback["typical_words"]), "...") | |
if feedback["similarity_1"]: | |
guesses_event.append((evoked_frame, feedback["similarity_1"])) | |
guesses_entity.append((evoked_frame, feedback["similarity_2"])) | |
print(f"\tsimilarity to SECRET STORY #1: {feedback['similarity_1']:.2f}") | |
print(f"\tsimilarity to SECRET STORY #2: {feedback['similarity_2']:.2f}") | |
else: | |
print("similarity: unknown") | |
print() | |
if not frames_and_info: | |
print("I don't know any of the stories in your sentence. Try entering another sentence.") | |
elif secret_event in frames and secret_entity in frames: | |
print(f"YOU WIN! You made a sentence with both of the SECRET STORIES: {secret_event.upper()} and {secret_entity.upper()}.\nYou won the game in {num_guesses} guesses, great job!") | |
break | |
elif secret_event in frames: | |
print(f"Great, you guessed SECRET STORY #1! It was {secret_event.upper()}!") | |
print("To win, make a sentence with this story and SECRET STORY #2 hidden in it.") | |
elif secret_entity in frames: | |
print(f"Great, you guessed SECRET STORY #2! It was {secret_entity.upper()}!") | |
print("To win, make a sentence with this story and SECRET STORY #1 hidden in it.") | |
# dummy version | |
# def analyze_sentence(sentence): | |
# return sentence.split() | |
def analyze_sentence(sentence): | |
lome_data = lome_wrapper.analyze(sentence) | |
frames = set() | |
for token_items in lome_data["analyses"][0]["frame_list"]: | |
for item in token_items: | |
if item.startswith("T:"): | |
evoked_frame = item.split("@")[0].replace("T:", "") | |
frames.add(evoked_frame) | |
return frames | |
def make_frame_feedback_msg(frame_feedback): | |
feedback_msg = [] | |
for i, feedback in enumerate(frame_feedback): | |
feedback_msg.append(f"* STORY {i}: *{feedback['frame'].upper()}*") | |
feedback_msg.append("\t* typical words: *" + " ".join(feedback["typical_words"]) + "* ...") | |
if feedback["typical_sentence"]: | |
feedback_msg.append(f"\t* typical context: «{feedback['typical_sentence']}»") | |
if feedback["similarity_1"]: | |
feedback_msg.append(f"\t* similarity to SECRET STORY #1: {feedback['similarity_1']:.2f}") | |
feedback_msg.append(f"\t* similarity to SECRET STORY #2: {feedback['similarity_2']:.2f}") | |
else: | |
feedback_msg.append(f"\t* similarity: unknown") | |
return "\n".join(feedback_msg) | |
def format_hint_sentence(hint_example): | |
hint_tgt_idx = hint_example["target_idx"] | |
hint_example_redacted = hint_example["text"][:hint_tgt_idx[0]] + "******" + hint_example["text"][hint_tgt_idx[1]:] | |
return hint_example_redacted.strip() | |
def play_turn(): | |
# remove text from input | |
sentence = st.session_state["cur_sentence"] | |
st.session_state["cur_sentence"] = "" | |
# get previous game state | |
game_state = st.session_state["game_state"] | |
secret_event, secret_entity = game_state["secret_event"], game_state["secret_entity"] | |
guesses_event, guesses_entity = game_state["guesses_event"], game_state["guesses_entity"] | |
# reset hints | |
st.session_state["hints"] = [None, None] | |
# reveal correct frames | |
if sentence.strip().lower() == "show me the frames": | |
st.warning(f"The correct frames are: {secret_event.upper()} and {secret_entity.upper()}") | |
# process hints | |
elif sentence.strip() == "HINT": | |
guesses_event = sorted(game_state["guesses_event"], key=lambda t: t[1], reverse=True) | |
guesses_entity = sorted(game_state["guesses_entity"], key=lambda t: t[1], reverse=True) | |
best_guess_event = guesses_event[0][0] if guesses_event else "Event" | |
best_guess_entity = guesses_entity[0][0] if guesses_entity else "Entity" | |
event_hint = make_hint(st.session_state["gensim_model"], secret_event, best_guess_event) | |
entity_hint = make_hint(st.session_state["gensim_model"], secret_entity, best_guess_entity) | |
if event_hint: | |
st.session_state["hints"][0] = format_hint_sentence(event_hint[1]) | |
if entity_hint: | |
st.session_state["hints"][1] = format_hint_sentence(entity_hint[1]) | |
else: | |
frames = analyze_sentence(sentence) | |
frames_and_info = get_frame_info(frames) | |
frame_feedback = get_frame_feedback(frames_and_info, st.session_state["gensim_model"], secret_event, secret_entity) | |
# update game state post analysis | |
game_state["num_guesses"] += 1 | |
for fdb in frame_feedback: | |
if fdb["similarity_1"]: | |
guesses_event.add((fdb["frame"], fdb["similarity_1"], fdb["rank_1"])) | |
guesses_entity.add((fdb["frame"], fdb["similarity_2"], fdb["rank_2"])) | |
st.session_state["frame_feedback"] = frame_feedback | |
if secret_event in frames and secret_entity in frames: | |
st.session_state["game_over"] = True | |
st.session_state["guesses_to_win"] = game_state["num_guesses"] | |
def display_guess_status(): | |
game_state = st.session_state["game_state"] | |
guesses_entity = sorted(game_state["guesses_entity"], key=lambda t: t[1], reverse=True) | |
guesses_event = sorted(game_state["guesses_event"], key=lambda t: t[1], reverse=True) | |
if guesses_event or guesses_entity: | |
st.header("Best guesses") | |
event_col, entity_col = st.columns(2) | |
if guesses_event: | |
with event_col: | |
st.subheader("Event Mini-Story") | |
st.table(pd.DataFrame(guesses_event, columns=["Story", "Similarity", "Steps To Go"])) | |
if game_state["secret_event"] in [g for g, _, _ in guesses_event]: | |
st.info("Great, you guessed the Event story! In order to win, make a sentence containing both the secret stories.") | |
if guesses_entity: | |
with entity_col: | |
st.subheader("Thing Mini-Story") | |
st.table(pd.DataFrame(guesses_entity, columns=["Story", "Similarity", "Steps To Go"])) | |
if game_state["secret_entity"] in [g for g, _, _ in guesses_entity]: | |
st.info("Great, you guessed the Thing story! In order to win, make a sentence containing both the secret stories.") | |
def format_feedback(frame_feedback): | |
out = [] | |
for fdb in frame_feedback: | |
out.append({ | |
"Story": fdb["frame"], | |
"Similarity (Event)": f"{fdb['similarity_1']:.2f}", | |
"Similarity (Thing)": f"{fdb['similarity_2']:.2f}", | |
"Typical Context": fdb["typical_sentence"], | |
"Typical Words": " ".join(fdb["typical_words"]) | |
}) | |
return out | |
def display_introduction(): | |
st.subheader("Why this game?") | |
st.markdown( | |
""" | |
Words are not just words: behind every word, a _mini-story_ (also known as "frame") is hidden | |
that appears in our imagination when we hear the word. For example, when we hear the word | |
"talking" we can imagine a mini-story that involves several people who are interacting | |
with each other. Or, if we hear the word "cookie", we might think of someone eating a cookie. | |
""".strip()) | |
st.subheader("How does it work?") | |
st.markdown( | |
"* In this game, there are two secret mini-stories, and it's your job to figure out which ones!" | |
"\n" | |
"* The first mini-story is about an _Event_ (something that happens in the world, like a thunderstorm, " | |
"people talking, someone eating pasta), and the other one is a _Thing_ (a concrete thing like a tree" | |
"or something abstract like 'love')." | |
"\n" | |
"* How to guess the stories? Well, just type a sentence, and we'll tell you which mini-stories are " | |
"hidden in the sentence. For each of the stories, we'll tell you how close they are to the secret ones." | |
"\n" | |
"* Once you type a sentence with both of the secret mini-stories, you win!" | |
) | |
def display_hints(): | |
event_hint, entity_hint = st.session_state["hints"] | |
if event_hint or entity_hint: | |
st.header("Hints") | |
st.info("So you need some help? Here you get your hint sentences! Guess the hidden word, use it in a sentence, and we'll help you get one step closer.") | |
if event_hint: | |
st.markdown(f"**Event Hint**:\n>_{event_hint}_") | |
if entity_hint: | |
st.markdown(f"**Thing Hint**:\n>_{entity_hint}_") | |
def display_frame_feedback(): | |
frame_feedback = st.session_state["frame_feedback"] | |
if frame_feedback: | |
st.header("Feedback") | |
st.text("Your sentence contains the following stories: ") | |
feedback_df = format_feedback(frame_feedback) | |
st.table(pd.DataFrame(feedback_df)) | |
def run_game_st(debug=True): | |
if not st.session_state.get("initialized", False): | |
secret_event, secret_entity = choose_secret_frames() | |
gensim_m = gensim.models.word2vec.KeyedVectors.load_word2vec_format("data/frame_embeddings.w2v.txt") | |
game_state = { | |
"secret_event": secret_event, | |
"secret_entity": secret_entity, | |
"num_guesses": 0, | |
"guesses_event": set(), | |
"guesses_entity": set(), | |
} | |
st.session_state["initialized"] = True | |
st.session_state["show_introduction"] = False | |
st.session_state["game_over"] = False | |
st.session_state["guesses_to_win"] = -1 | |
st.session_state["game_state"] = game_state | |
st.session_state["gensim_model"] = gensim_m | |
st.session_state["frame_feedback"] = None | |
st.session_state["hints"] = [None, None] | |
else: | |
gensim_m = st.session_state["gensim_model"] | |
game_state = st.session_state["game_state"] | |
secret_event, secret_entity = game_state["secret_event"], game_state["secret_entity"] | |
header = st.container() | |
with header: | |
st.title("FillmorLe") | |
st.checkbox("Show explanation?", key="show_introduction") | |
if st.session_state["show_introduction"]: | |
display_introduction() | |
st.header(f"Guess #{st.session_state['game_state']['num_guesses'] + 1}") | |
st.text_input("Enter a sentence or type 'HINT' if you're stuck", key="cur_sentence", on_change=play_turn) | |
if st.session_state["game_over"]: | |
st.success(f"You won in {st.session_state['guesses_to_win']}!") | |
display_hints() | |
display_frame_feedback() | |
display_guess_status() | |
if __name__ == "__main__": | |
run_game_st() |