|
import random |
|
from typing import AnyStr |
|
|
|
|
|
import itertools |
|
import streamlit as st |
|
|
|
import torch.nn.parameter |
|
from bs4 import BeautifulSoup |
|
import numpy as np |
|
import base64 |
|
|
|
import validators |
|
from spacy_streamlit.util import get_svg |
|
from validators import ValidationFailure |
|
|
|
from custom_renderer import render_sentence_custom |
|
from flair.data import Sentence |
|
from flair.models import SequenceTagger |
|
from sentence_transformers import SentenceTransformer |
|
|
|
import spacy |
|
from spacy import displacy |
|
from spacy_streamlit import visualize_parser |
|
|
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
from transformers import pipeline |
|
import os |
|
from transformers_interpret import SequenceClassificationExplainer |
|
|
|
|
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
|
@st.experimental_singleton |
|
def get_sentence_embedding_model(): |
|
return SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') |
|
|
|
|
|
@st.experimental_singleton |
|
def get_spacy(): |
|
nlp = spacy.load('en_core_web_lg') |
|
return nlp |
|
|
|
|
|
|
|
|
|
@st.experimental_singleton |
|
|
|
def get_flair_tagger(): |
|
return SequenceTagger.load("flair/ner-english-ontonotes-fast") |
|
|
|
|
|
|
|
st.set_page_config( |
|
page_title="Post-processing summarization fact checker", |
|
page_icon="", |
|
layout="centered", |
|
initial_sidebar_state="auto", |
|
menu_items={ |
|
'Get help': None, |
|
'Report a bug': None, |
|
'About': None, |
|
} |
|
) |
|
|
|
|
|
def list_all_article_names() -> list: |
|
filenames = [] |
|
for file in sorted(os.listdir('./sample-articles/')): |
|
if file.endswith('.txt'): |
|
filenames.append(file.replace('.txt', '')) |
|
return filenames |
|
|
|
|
|
def fetch_article_contents(filename: str) -> AnyStr: |
|
with open(f'./sample-articles/{filename.lower()}.txt', 'r') as f: |
|
data = f.read() |
|
return data |
|
|
|
|
|
def fetch_summary_contents(filename: str) -> AnyStr: |
|
with open(f'./sample-summaries/{filename.lower()}.txt', 'r') as f: |
|
data = f.read() |
|
return data |
|
|
|
|
|
def fetch_entity_specific_contents(filename: str) -> AnyStr: |
|
with open(f'./entity-specific-text/{filename.lower()}.txt', 'r') as f: |
|
data = f.read() |
|
return data |
|
|
|
|
|
def fetch_dependency_specific_contents(filename: str) -> AnyStr: |
|
with open(f'./dependency-specific-text/{filename.lower()}.txt', 'r') as f: |
|
data = f.read() |
|
return data |
|
|
|
|
|
def display_summary(article_name: str): |
|
summary_content = fetch_summary_contents(article_name) |
|
st.session_state.summary_output = summary_content |
|
soup = BeautifulSoup(summary_content, features="html.parser") |
|
HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>""" |
|
return HTML_WRAPPER.format(soup) |
|
|
|
|
|
def get_all_entities_per_sentence(text): |
|
|
|
|
|
|
|
doc = nlp(text) |
|
|
|
sentences = list(doc.sents) |
|
|
|
entities_all_sentences = [] |
|
for sentence in sentences: |
|
entities_this_sentence = [] |
|
|
|
|
|
for entity in sentence.ents: |
|
entities_this_sentence.append(str(entity)) |
|
|
|
|
|
sentence_entities = Sentence(str(sentence)) |
|
tagger.predict(sentence_entities) |
|
for entity in sentence_entities.get_spans('ner'): |
|
entities_this_sentence.append(entity.text) |
|
entities_all_sentences.append(entities_this_sentence) |
|
|
|
return entities_all_sentences |
|
|
|
|
|
def get_all_entities(text): |
|
all_entities_per_sentence = get_all_entities_per_sentence(text) |
|
return list(itertools.chain.from_iterable(all_entities_per_sentence)) |
|
|
|
|
|
|
|
def get_and_compare_entities(article_name: str): |
|
article_content = fetch_article_contents(article_name) |
|
all_entities_per_sentence = get_all_entities_per_sentence(article_content) |
|
|
|
entities_article = list(itertools.chain.from_iterable(all_entities_per_sentence)) |
|
|
|
summary_content = fetch_summary_contents(article_name) |
|
all_entities_per_sentence = get_all_entities_per_sentence(summary_content) |
|
|
|
entities_summary = list(itertools.chain.from_iterable(all_entities_per_sentence)) |
|
|
|
matched_entities = [] |
|
unmatched_entities = [] |
|
for entity in entities_summary: |
|
|
|
if any(entity.lower() in substring_entity.lower() for substring_entity in entities_article): |
|
matched_entities.append(entity) |
|
elif any( |
|
np.inner(sentence_embedding_model.encode(entity), sentence_embedding_model.encode(art_entity)) > 0.9 for |
|
art_entity in entities_article): |
|
matched_entities.append(entity) |
|
else: |
|
unmatched_entities.append(entity) |
|
return matched_entities, unmatched_entities |
|
|
|
|
|
def highlight_entities(article_name: str): |
|
summary_content = fetch_summary_contents(article_name) |
|
|
|
markdown_start_red = "<mark class=\"entity\" style=\"background: rgb(238, 135, 135);\">" |
|
markdown_start_green = "<mark class=\"entity\" style=\"background: rgb(121, 236, 121);\">" |
|
markdown_end = "</mark>" |
|
|
|
matched_entities, unmatched_entities = get_and_compare_entities(article_name) |
|
|
|
for entity in matched_entities: |
|
summary_content = summary_content.replace(entity, markdown_start_green + entity + markdown_end) |
|
|
|
for entity in unmatched_entities: |
|
summary_content = summary_content.replace(entity, markdown_start_red + entity + markdown_end) |
|
soup = BeautifulSoup(summary_content, features="html.parser") |
|
|
|
HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; |
|
margin-bottom: 2.5rem">{}</div> """ |
|
|
|
return HTML_WRAPPER.format(soup) |
|
|
|
|
|
def render_dependency_parsing(text: str): |
|
html = render_sentence_custom(text) |
|
html = html.replace("\n\n", "\n") |
|
st.write(get_svg(html), unsafe_allow_html=True) |
|
|
|
|
|
|
|
def check_dependency(article: bool): |
|
|
|
if article: |
|
text = st.session_state.article_text |
|
all_entities = get_all_entities_per_sentence(text) |
|
|
|
else: |
|
text = st.session_state.summary_output |
|
all_entities = get_all_entities_per_sentence(text) |
|
|
|
doc = nlp(text) |
|
tok_l = doc.to_json()['tokens'] |
|
|
|
test_list_dict_output = [] |
|
|
|
sentences = list(doc.sents) |
|
for i, sentence in enumerate(sentences): |
|
start_id = sentence.start |
|
end_id = sentence.end |
|
for t in tok_l: |
|
|
|
if t["id"] < start_id or t["id"] > end_id: |
|
continue |
|
head = tok_l[t['head']] |
|
if t['dep'] == 'amod' or t['dep'] == "pobj": |
|
object_here = text[t['start']:t['end']] |
|
object_target = text[head['start']:head['end']] |
|
if t['dep'] == "pobj" and str.lower(object_target) != "in": |
|
continue |
|
|
|
if object_here in all_entities[i]: |
|
|
|
identifier = object_here + t['dep'] + object_target |
|
test_list_dict_output.append({"dep": t['dep'], "cur_word_index": (t['id'] - sentence.start), |
|
"target_word_index": (t['head'] - sentence.start), |
|
"identifier": identifier, "sentence": str(sentence)}) |
|
elif object_target in all_entities[i]: |
|
|
|
identifier = object_here + t['dep'] + object_target |
|
test_list_dict_output.append({"dep": t['dep'], "cur_word_index": (t['id'] - sentence.start), |
|
"target_word_index": (t['head'] - sentence.start), |
|
"identifier": identifier, "sentence": str(sentence)}) |
|
else: |
|
continue |
|
|
|
return test_list_dict_output |
|
|
|
|
|
|
|
def is_valid_url(url: str) -> bool: |
|
result = validators.url(url) |
|
if isinstance(result, ValidationFailure): |
|
return False |
|
return True |
|
|
|
|
|
|
|
if 'results' not in st.session_state: |
|
st.session_state.results = [] |
|
|
|
|
|
st.title('Summarization fact checker') |
|
|
|
|
|
st.header("Introduction") |
|
st.markdown("""Recent work using transformers on large text corpora has shown great success when fine-tuned on |
|
several different downstream NLP tasks. One such task is that of text summarization. The goal of text summarization |
|
is to generate concise and accurate summaries from input document(s). There are 2 types of summarization: extractive |
|
and abstractive. **Extractive summarization** merely copies informative fragments from the input, |
|
whereas **abstractive summarization** may generate novel words. A good abstractive summary should cover principal |
|
information in the input and has to be linguistically fluent. This blogpost will focus on this more difficult task of |
|
abstractive summary generation.""") |
|
|
|
st.markdown("""To generate summaries we will use the [PEGASUS] (https://huggingface.co/google/pegasus-cnn_dailymail) |
|
model, producing abstractive summaries from large articles. These summaries often contain sentences with different |
|
kinds of errors. Rather than improving the core model, we will look into possible post-processing steps to improve |
|
the generated summaries. By comparing contents of the summary with the source text, we come up with a factualness |
|
metric, indicating the trustworthiness of the generated summary. Throughout this blog, we will also explain the |
|
results for some methods on specific examples. These text blocks will be indicated and they change according to the |
|
currently selected article.""") |
|
|
|
nlp = get_spacy() |
|
sentence_embedding_model = get_sentence_embedding_model() |
|
tagger = get_flair_tagger() |
|
|
|
|
|
st.header("Generating summaries") |
|
st.markdown("Let’s start by selecting an article text for which we want to generate a summary, or you can provide " |
|
"text yourself. Note that it’s suggested to provide a sufficiently large text, as otherwise the summary " |
|
"generated from it might not be optimal, leading to suboptimal performance of the post-processing steps.") |
|
|
|
|
|
selected_article = st.selectbox('Select an article or provide your own:', |
|
list_all_article_names()) |
|
st.session_state.article_text = fetch_article_contents(selected_article) |
|
article_text = st.text_area( |
|
label='Full article text', |
|
value=st.session_state.article_text, |
|
height=150 |
|
) |
|
|
|
st.markdown("Below you can find the generated summary for the article. Based on empirical research, we will discuss " |
|
"two main methods that detect some common errors. We can then score different summaries, to indicate how " |
|
"factual a summary is for a given article. The idea is that in production, you could generate a set of " |
|
"summaries for the same article, with different parameters (or even different models). By using " |
|
"post-processing error detection, we can then select the best possible summary.") |
|
if st.session_state.article_text: |
|
with st.spinner('Generating summary...'): |
|
|
|
|
|
summary_displayed = display_summary(selected_article) |
|
|
|
st.write("**Generated summary:**", summary_displayed, unsafe_allow_html=True) |
|
else: |
|
st.error('**Error**: No comment to classify. Please provide a comment.', |
|
help="Generate summary for the given article text") |
|
|
|
if is_valid_url(article_text): |
|
print("YES") |
|
else: |
|
print("NO") |
|
|
|
|
|
def render_svg(svg_file): |
|
with open(svg_file, "r") as f: |
|
lines = f.readlines() |
|
svg = "".join(lines) |
|
|
|
|
|
b64 = base64.b64encode(svg.encode("utf-8")).decode("utf-8") |
|
html = r'<img src="data:image/svg+xml;base64,%s"/>' % b64 |
|
return html |
|
|
|
|
|
|
|
st.header("Entity matching") |
|
st.markdown("The first method we will discuss is called **Named Entity Recognition** (NER). NER is the task of " |
|
"identifying and categorising key information (entities) in text. An entity can be a singular word or a " |
|
"series of words that consistently refers to the same thing. Common entity classes are person names, " |
|
"organisations, locations and so on. By applying NER to both the article and its summary, we can spot " |
|
"possible **hallucinations**. Hallucinations are words generated by the model that are not supported by " |
|
"the source input. In theory all entities in the summary (such as dates, locations and so on), " |
|
"should also be present in the article. Thus we can extract all entities from the summary and compare " |
|
"them to the entities of the original article, spotting potential hallucinations. The more unmatched " |
|
"entities we find, the lower the factualness score of the summary. ") |
|
with st.spinner("Calculating and matching entities..."): |
|
entity_match_html = highlight_entities(selected_article) |
|
st.write(entity_match_html, unsafe_allow_html=True) |
|
red_text = """<font color="black"><span style="background-color: rgb(238, 135, 135); opacity: |
|
1;">red</span></font> """ |
|
green_text = """<font color="black"> |
|
<span style="background-color: rgb(121, 236, 121); opacity: 1;">green</span> |
|
</font>""" |
|
|
|
markdown_start_red = "<mark class=\"entity\" style=\"background: rgb(238, 135, 135);\">" |
|
markdown_start_green = "<mark class=\"entity\" style=\"background: rgb(121, 236, 121);\">" |
|
st.markdown("We call this technique “entity matching” and here you can see what this looks like when we apply " |
|
"this method on the summary. Entities in the summary are marked " + green_text + " when the entity " |
|
"also exists in the " |
|
"article, " |
|
"while unmatched " |
|
"entities are " |
|
"marked " + |
|
red_text + ". Several of the example articles and their summaries indicate different errors we find " |
|
"by using this technique. Based on which article you choose, we provide a short " |
|
"explanation of the results below.", |
|
unsafe_allow_html=True) |
|
entity_specific_text = fetch_entity_specific_contents(selected_article) |
|
soup = BeautifulSoup(entity_specific_text, features="html.parser") |
|
HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; |
|
margin-bottom: 2.5rem">{}</div> """ |
|
st.write("💡👇 **Specific example explanation** 👇💡", HTML_WRAPPER.format(soup), unsafe_allow_html=True) |
|
|
|
|
|
st.header("Dependency comparison") |
|
st.markdown("The second method we use for post-processing is called **Dependency parsing**: the process in which the " |
|
"grammatical structure in a sentence is analysed, to find out related words as well as the type of the " |
|
"relationship between them. For the sentence “Jan’s wife is called Sarah” you would get the following " |
|
"dependency graph:") |
|
|
|
|
|
|
|
st.write(render_svg('ExampleParsing.svg'), unsafe_allow_html=True) |
|
st.markdown("Here, “Jan” is the “poss” (possession modifier) of “wife”. If suddenly the summary would read “Jan’s " |
|
"husband…”, there would be a dependency in the summary that is non-existent in the article itself (namely " |
|
"“Jan” is the “poss” of “husband”). However, often new dependencies are introduced in the summary that " |
|
"are still correct. “The borders of Ukraine” have a different dependency between “borders” and “Ukraine” " |
|
"than “Ukraine’s borders”, while both descriptions have the same meaning. So just matching all " |
|
"dependencies between article and summary (as we did with entity matching) would not be a robust method.") |
|
st.markdown("However, by empirical testing, we have found that there are certain dependencies which can be used for " |
|
"such matching techniques. When unmatched, these specific dependencies are often an indication of a " |
|
"wrongly constructed sentence. **Should I explain this more/better or is it enough that I explain by " |
|
"example specific run throughs?**. We found 2(/3 TODO) common dependencies which, when present in the " |
|
"summary but not in the article, are highly indicative of factualness errors. Furthermore, we only check " |
|
"dependencies between an existing **entity** and its direct connections. Below we highlight all unmatched " |
|
"dependencies that satisfy the discussed constraints. We also discuss the specific results for the " |
|
"currently selected article.") |
|
with st.spinner("Doing dependency parsing..."): |
|
summary_deps = check_dependency(False) |
|
article_deps = check_dependency(True) |
|
total_unmatched_deps = [] |
|
for summ_dep in summary_deps: |
|
if not any(summ_dep['identifier'] in art_dep['identifier'] for art_dep in article_deps): |
|
total_unmatched_deps.append(summ_dep) |
|
|
|
|
|
if total_unmatched_deps: |
|
for current_drawing_list in total_unmatched_deps: |
|
render_dependency_parsing(current_drawing_list) |
|
dep_specific_text = fetch_dependency_specific_contents(selected_article) |
|
soup = BeautifulSoup(dep_specific_text, features="html.parser") |
|
HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; |
|
margin-bottom: 2.5rem">{}</div> """ |
|
st.write("💡👇 **Specific example explanation** 👇💡", HTML_WRAPPER.format(soup), unsafe_allow_html=True) |
|
|
|
|
|
st.header("Wrapping up") |
|
st.markdown("We have presented 2 methods that try to improve summaries via post-processing steps. Entity matching can " |
|
"be used to solve hallucinations, while dependency comparison can be used to filter out some bad " |
|
"sentences (and thus worse summaries). These methods highlight the possibilities of post-processing " |
|
"AI-made summaries, but are only a basic introduction. As the methods were empirically tested they are " |
|
"definitely not sufficiently robust for general use-cases. (something about that we tested also RE and " |
|
"maybe other things).") |
|
st.markdown("####") |
|
st.markdown("Below we generated 5 different kind of summaries from the article in which their ranks are estimated, " |
|
"and hopefully the best summary (read: the one that a human would prefer or indicate as the best one) " |
|
"will be at the top. TODO: implement this (at the end I think) and also put something in the text with " |
|
"the actual parameters or something? ") |
|
|