Spaces:
Running
Running
import sys | |
import os | |
import streamlit as st | |
from PIL import Image | |
import pandas as pd | |
from transformers import pipeline | |
import spacy | |
import en_core_web_lg | |
current = os.path.dirname(os.path.realpath(__file__)) | |
parent = os.path.dirname(current) | |
sys.path.append(parent) | |
from helpers import display_nli_pr_results_as_list, prompt_to_nli, get_who_what_whom_qa | |
def load_spacy_pipeline(): | |
return en_core_web_lg.load() | |
def choose_text_menu(text): | |
if 'text' not in st.session_state: | |
st.session_state.text = 'Several demonstrators were injured.' | |
text = st.text_area('Event description', st.session_state.text) | |
return text | |
# # Load Models in cache | |
def load_model_prompting(): | |
return pipeline("fill-mask", model="distilbert-base-uncased") | |
def load_model_nli(): | |
return pipeline(task="sentiment-analysis", model="roberta-large-mnli") | |
def load_model_qa(): | |
model_name = "deepset/roberta-base-squad2" | |
model = pipeline(model=model_name, tokenizer=model_name, task="question-answering") | |
return model | |
nlp = load_spacy_pipeline() | |
### App START | |
st.markdown("### 1. Actor-target coding (experimental):") | |
def load_qa_image(): | |
return Image.open('pipeline_qa.png') | |
st.image(load_qa_image(),caption="""Actor-target Coding Flow. First we get the entailed answer candidates through the PR-ENT pipeline. | |
Then we construct questions based on these tokens to extract actors and targets, 2 questions per verb. | |
Finally, we pass these questions and event description to a pre-trained extractive question answering model and fill a table of [Actor, Action, Target].""") | |
st.markdown(""" | |
Here we use an extractive question answering model to find the actor and target of an event. | |
As this is still in experimental phase, there are some limitations: | |
- The only template possible is `People were [Z].`, this allows us to get a verb to construct the two questions: | |
- `Who was [Z]?` to find the target. | |
- `Who [Z] people?` to find the actor. | |
- `top_k = 10` and `entailment_threshold = 0.5`. | |
The results of the QA are given along the confidence score of the model in brackets `[xx.x%]` | |
""") | |
## Load Models | |
model_nli = load_model_nli() | |
model_prompting = load_model_prompting() | |
model_qa = load_model_qa() | |
st.markdown(""" | |
### 2. Write an event description: | |
The first step is to write an event description that will be fed to the pipeline. This can be any text in English. | |
""") | |
text = choose_text_menu('') | |
st.session_state.text = text | |
st.markdown(""" | |
### 3. Run actor-target coding: | |
""") | |
if "old_text_qa" not in st.session_state: | |
st.session_state.old_text_qa =st.session_state.text | |
qa_button = st.button("Run actor-target coding") | |
if qa_button: | |
computation_state_qa = st.text("Computation Running.") | |
st.session_state.old_text_qa =st.session_state.text | |
prompt = "People were {}." | |
results = prompt_to_nli(text, prompt, model_prompting, model_nli, nlp, 10, 0.5, True) | |
list_results = [x[0][0] + ' ' + str(int(x[1][1]*100)) + '%' for x in results] | |
st.session_state.list_results_prompt_qa = list_results | |
list_tokens = [x[0][0] for x in results] | |
who_what_whom = get_who_what_whom_qa(text, list_tokens, model_qa) | |
st.session_state.who_what_whom = who_what_whom | |
computation_state_qa.text("Computation Done.") | |
if 'who_what_whom' in st.session_state: | |
st.write('**Event Description**: {}'.format(st.session_state.old_text_qa)) | |
st.write('**Template**: "{}"; **Top K**: {}; **Entailment Threshold**: {}.'.format("People were [Z]",10, 0.5)) | |
display_nli_pr_results_as_list('', st.session_state.list_results_prompt_qa) | |
st.write(pd.DataFrame(st.session_state.who_what_whom)) | |