MatthiasC commited on
Commit
305fb83
·
1 Parent(s): 357d42c

Improve code and add more example specific text

Browse files
app.py CHANGED
@@ -1,8 +1,10 @@
1
  import random
2
  from typing import AnyStr
 
3
 
4
  import itertools
5
  import streamlit as st
 
6
  import torch.nn.parameter
7
  from bs4 import BeautifulSoup
8
  import numpy as np
@@ -15,6 +17,7 @@ from validators import ValidationFailure
15
  from custom_renderer import render_sentence_custom
16
  from flair.data import Sentence
17
  from flair.models import SequenceTagger
 
18
 
19
  import spacy
20
  from spacy import displacy
@@ -25,30 +28,8 @@ from transformers import pipeline
25
  import os
26
  from transformers_interpret import SequenceClassificationExplainer
27
 
28
- # Map model names to URLs
29
- model_names_to_URLs = {
30
- 'ml6team/distilbert-base-dutch-cased-toxic-comments':
31
- 'https://huggingface.co/ml6team/distilbert-base-dutch-cased-toxic-comments',
32
- 'ml6team/robbert-dutch-base-toxic-comments':
33
- 'https://huggingface.co/ml6team/robbert-dutch-base-toxic-comments',
34
- }
35
-
36
- about_page_markdown = f"""# 🤬 Dutch Toxic Comment Detection Space
37
-
38
- Made by [ML6](https://ml6.eu/).
39
-
40
- Token attribution is performed using [transformers-interpret](https://github.com/cdpierse/transformers-interpret).
41
- """
42
-
43
- regular_emojis = [
44
- '😐', '🙂', '👶', '😇',
45
- ]
46
- undecided_emojis = [
47
- '🤨', '🧐', '🥸', '🥴', '🤷',
48
- ]
49
- potty_mouth_emojis = [
50
- '🤐', '👿', '😡', '🤬', '☠️', '☣️', '☢️',
51
- ]
52
 
53
  # Page setup
54
  st.set_page_config(
@@ -64,58 +45,6 @@ st.set_page_config(
64
  )
65
 
66
 
67
- # Model setup
68
- @st.cache(allow_output_mutation=True,
69
- suppress_st_warning=True,
70
- show_spinner=False)
71
- def load_pipeline(model_name):
72
- with st.spinner('Loading model (this might take a while)...'):
73
- toxicity_pipeline = pipeline(
74
- 'text-classification',
75
- model=model_name,
76
- tokenizer=model_name)
77
- cls_explainer = SequenceClassificationExplainer(
78
- toxicity_pipeline.model,
79
- toxicity_pipeline.tokenizer)
80
- return toxicity_pipeline, cls_explainer
81
-
82
-
83
- # Auxiliary functions
84
- def format_explainer_html(html_string):
85
- """Extract tokens with attribution-based background color."""
86
- inside_token_prefix = '##'
87
- soup = BeautifulSoup(html_string, 'html.parser')
88
- p = soup.new_tag('p',
89
- attrs={'style': 'color: black; background-color: white;'})
90
- # Select token elements and remove model specific tokens
91
- current_word = None
92
- for token in soup.find_all('td')[-1].find_all('mark')[1:-1]:
93
- text = token.font.text.strip()
94
- if text.startswith(inside_token_prefix):
95
- text = text[len(inside_token_prefix):]
96
- else:
97
- # Create a new span for each word (sequence of sub-tokens)
98
- if current_word is not None:
99
- p.append(current_word)
100
- p.append(' ')
101
- current_word = soup.new_tag('span')
102
- token.string = text
103
- token.attrs['style'] = f"{token.attrs['style']}; padding: 0.2em 0em;"
104
- current_word.append(token)
105
-
106
- # Add last word
107
- p.append(current_word)
108
-
109
- # Add left and right-padding to each word
110
- for span in p.find_all('span'):
111
- span.find_all('mark')[0].attrs['style'] = (
112
- f"{span.find_all('mark')[0].attrs['style']}; padding-left: 0.2em;")
113
- span.find_all('mark')[-1].attrs['style'] = (
114
- f"{span.find_all('mark')[-1].attrs['style']}; padding-right: 0.2em;")
115
-
116
- return p
117
-
118
-
119
  def list_all_article_names() -> list:
120
  filenames = []
121
  for file in sorted(os.listdir('./sample-articles/')):
@@ -148,32 +77,6 @@ def fetch_dependency_specific_contents(filename: str) -> AnyStr:
148
  return data
149
 
150
 
151
- def classify_comment(comment, selected_model):
152
- """Classify the given comment and augment with additional information."""
153
- toxicity_pipeline, cls_explainer = load_pipeline(selected_model)
154
- result = toxicity_pipeline(comment)[0]
155
- result['model_name'] = selected_model
156
-
157
- # Add explanation
158
- result['word_attribution'] = cls_explainer(comment, class_name="non-toxic")
159
- result['visualitsation_html'] = cls_explainer.visualize()._repr_html_()
160
- result['tokens_with_background'] = format_explainer_html(
161
- result['visualitsation_html'])
162
-
163
- # Choose emoji reaction
164
- label, score = result['label'], result['score']
165
- if label == 'toxic' and score > 0.1:
166
- emoji = random.choice(potty_mouth_emojis)
167
- elif label in ['non_toxic', 'non-toxic'] and score > 0.1:
168
- emoji = random.choice(regular_emojis)
169
- else:
170
- emoji = random.choice(undecided_emojis)
171
- result.update({'text': comment, 'emoji': emoji})
172
-
173
- # Add result to session
174
- st.session_state.results.append(result)
175
-
176
-
177
  def display_summary(article_name: str):
178
  summary_content = fetch_summary_contents(article_name)
179
  st.session_state.summary_output = summary_content
@@ -244,6 +147,10 @@ def get_and_compare_entities(article_name: str):
244
  # TODO: currently substring matching but probably should do embedding method or idk?
245
  if any(entity.lower() in substring_entity.lower() for substring_entity in entities_article):
246
  matched_entities.append(entity)
 
 
 
 
247
  else:
248
  unmatched_entities.append(entity)
249
  return matched_entities, unmatched_entities
@@ -343,26 +250,27 @@ st.title('Summarization fact checker')
343
 
344
  # INTRODUCTION
345
  st.header("Introduction")
346
- st.markdown("""Recent work using transformers on large text corpora has shown great succes when fine-tuned on several
347
- different downstream NLP tasks. One such task is that of text summarization. The goal of text summarization is to
348
- generate concise and accurate summaries from input document(s). There are 2 types of summarization: extractive and
349
- abstractive. **Exstractive summarization** merely copies informative fragments from the input, whereas **abstractive
350
- summarization** may generate novel words. A good abstractive summary should cover principal information in the input
351
- and has to be linguistically fluent. This blogpost will focus on this more difficult task of abstractive summary
352
- generation.""")
353
 
354
  st.markdown("""To generate summaries we will use the [PEGASUS] (https://huggingface.co/google/pegasus-cnn_dailymail)
355
- model, producing abstractive summaries from large articles. These summaries often still contain sentences with
356
- different kinds of errors. Rather than improving the core model, we will look at possible post-processing steps to
357
- improve the generated summaries by detecting such possible errors. By comparing contents of the summary with the
358
- source text, we can create some sort of factualness metric, indicating the trustworthiness of the generated
359
- summary.""")
 
360
 
361
  # GENERATING SUMMARIES PART
362
  st.header("Generating summaries")
363
  st.markdown("Let’s start by selecting an article text for which we want to generate a summary, or you can provide "
364
  "text yourself. Note that it’s suggested to provide a sufficiently large text, as otherwise the summary "
365
- "generated might not be optimal to start from.")
366
 
367
  # TODO: NEED TO CHECK ARTICLE TEXT INSTEAD OF ARTICLE NAME ALSO FREE INPUT OPTION
368
  selected_article = st.selectbox('Select an article or provide your own:',
@@ -374,12 +282,11 @@ article_text = st.text_area(
374
  height=150
375
  )
376
 
377
- st.markdown("Below you can find the generated summary for the article. The summaries of the example articles "
378
- "vary in quality, but are chosen as such. Based on some common errors, we will discuss possible "
379
- "methods to improve or rank the summaries in the following paragraphs. The idea is that in "
380
- "production, you could generate a set of summaries for the same article, with different "
381
- "parameters (or even different models). By using post-processing methods and metrics, "
382
- "we can detect some errors in summaries, and choose the best one to actually use.")
383
  if st.session_state.article_text:
384
  with st.spinner('Generating summary...'):
385
  # classify_comment(article_text, selected_model)
@@ -395,6 +302,8 @@ if is_valid_url(article_text):
395
  print("YES")
396
  else:
397
  print("NO")
 
 
398
  def render_svg(svg_file):
399
  with open(svg_file, "r") as f:
400
  lines = f.readlines()
@@ -408,11 +317,15 @@ def render_svg(svg_file):
408
 
409
  # ENTITY MATCHING PART
410
  st.header("Entity matching")
411
- st.markdown("**Named entity recognition** (NER) is the task of identifying and categorising key information ("
412
- "entities) in text. An entity can be a singular word or a series of words that consistently refers to the "
413
- "same thing. Common entity classes are person names, organisations, locations and so on. By applying NER "
414
- "to both the article and its summary, we can spot possible **hallucinations**. Hallucinations are words "
415
- "generated by the model that are not supported by the source input. ")
 
 
 
 
416
  with st.spinner("Calculating and matching entities..."):
417
  entity_match_html = highlight_entities(selected_article)
418
  st.write(entity_match_html, unsafe_allow_html=True)
@@ -424,31 +337,47 @@ with st.spinner("Calculating and matching entities..."):
424
 
425
  markdown_start_red = "<mark class=\"entity\" style=\"background: rgb(238, 135, 135);\">"
426
  markdown_start_green = "<mark class=\"entity\" style=\"background: rgb(121, 236, 121);\">"
427
- st.markdown("Here you can see what this looks like when we apply entity-matching on the summary (compared to the "
428
- "original article). Entities in this summary are marked " + green_text + " when the entity also "
429
- "exists in the article, while unmatched entities are marked " + red_text + ".",
 
 
 
 
 
 
 
430
  unsafe_allow_html=True)
431
  entity_specific_text = fetch_entity_specific_contents(selected_article)
432
- st.markdown(entity_specific_text)
 
 
 
433
 
434
  # DEPENDENCY PARSING PART
435
  st.header("Dependency comparison")
436
- st.markdown("**Dependency parsing** is the process in which the grammatical structure in a sentence is analysed, "
437
- "to find out related words as well as the type of the relationship between them. For the sentence “Jan’s "
438
- "wife is called Sarah” you would get the following dependency graph:")
 
439
 
440
  # TODO: I wonder why the first doesn't work but the second does (it doesn't show deps otherwise)
441
  # st.image("ExampleParsing.svg")
442
  st.write(render_svg('ExampleParsing.svg'), unsafe_allow_html=True)
443
  st.markdown("Here, “Jan” is the “poss” (possession modifier) of “wife”. If suddenly the summary would read “Jan’s "
444
- "husband…”, there would be a dependency in the summary that is non-existent in the article itself. "
445
- "However, it could be that such a new dependency is not per se correct, “The borders of Ukraine” have a "
446
- "different dependency between “borders and Ukraine” than “Ukraine’s borders”, while this would also be "
447
- "correct. So general matching between summary and article wont work.")
448
- st.markdown("There is however a simple method that we found has potential in post-processing. Based on empirical "
449
- "results, we have found that when there are specific kinds of dependencies in the summary that are not in "
450
- "the article, these specific types are often an indication of a wrongly constructed sentence. Let’s take "
451
- "a look at an example:")
 
 
 
 
 
452
  with st.spinner("Doing dependency parsing..."):
453
  summary_deps = check_dependency(False)
454
  article_deps = check_dependency(True)
@@ -461,22 +390,22 @@ with st.spinner("Doing dependency parsing..."):
461
  if total_unmatched_deps:
462
  for current_drawing_list in total_unmatched_deps:
463
  render_dependency_parsing(current_drawing_list)
464
- dep_spec_text = fetch_dependency_specific_contents(selected_article)
465
- st.markdown(dep_spec_text)
466
- soup = BeautifulSoup("Example text option with box", features="html.parser")
467
  HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem;
468
  margin-bottom: 2.5rem">{}</div> """
469
- st.write(HTML_WRAPPER.format(soup), unsafe_allow_html=True)
470
 
471
  # OUTRO/CONCLUSION
472
  st.header("Wrapping up")
473
  st.markdown("We have presented 2 methods that try to improve summaries via post-processing steps. Entity matching can "
474
- "be used to solve hallucinations, while checking if specific dependencies are matched between summary and "
475
- "article can be used to filter out some bad sentences (and thus worse summaries). Of course these are "
476
- "only basic methods which were empirically tested, but they are a start at actually making something good "
477
- "(???). (something about that we tested also RE and maybe other things).")
 
478
  st.markdown("####")
479
- st.markdown("Now based on these methods you can check summaries and whether they are “good” or “bad”. Below you can "
480
- "generate 5 different kind of summaries for the starting article (based on different model params) in "
481
- "which their ranks are estimated, and hopefully the best summary (read: the one that a human would prefer "
482
- "or indicate as the best one) will be at the top.")
 
1
  import random
2
  from typing import AnyStr
3
+ # import tensorflow_hub as hub
4
 
5
  import itertools
6
  import streamlit as st
7
+
8
  import torch.nn.parameter
9
  from bs4 import BeautifulSoup
10
  import numpy as np
 
17
  from custom_renderer import render_sentence_custom
18
  from flair.data import Sentence
19
  from flair.models import SequenceTagger
20
+ from sentence_transformers import SentenceTransformer
21
 
22
  import spacy
23
  from spacy import displacy
 
28
  import os
29
  from transformers_interpret import SequenceClassificationExplainer
30
 
31
+ # USE_model = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")
32
+ sentence_embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
  # Page setup
35
  st.set_page_config(
 
45
  )
46
 
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  def list_all_article_names() -> list:
49
  filenames = []
50
  for file in sorted(os.listdir('./sample-articles/')):
 
77
  return data
78
 
79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  def display_summary(article_name: str):
81
  summary_content = fetch_summary_contents(article_name)
82
  st.session_state.summary_output = summary_content
 
147
  # TODO: currently substring matching but probably should do embedding method or idk?
148
  if any(entity.lower() in substring_entity.lower() for substring_entity in entities_article):
149
  matched_entities.append(entity)
150
+ elif any(
151
+ np.inner(sentence_embedding_model.encode(entity), sentence_embedding_model.encode(art_entity)) > 0.9 for
152
+ art_entity in entities_article):
153
+ matched_entities.append(entity)
154
  else:
155
  unmatched_entities.append(entity)
156
  return matched_entities, unmatched_entities
 
250
 
251
  # INTRODUCTION
252
  st.header("Introduction")
253
+ st.markdown("""Recent work using transformers on large text corpora has shown great success when fine-tuned on
254
+ several different downstream NLP tasks. One such task is that of text summarization. The goal of text summarization
255
+ is to generate concise and accurate summaries from input document(s). There are 2 types of summarization: extractive
256
+ and abstractive. **Extractive summarization** merely copies informative fragments from the input,
257
+ whereas **abstractive summarization** may generate novel words. A good abstractive summary should cover principal
258
+ information in the input and has to be linguistically fluent. This blogpost will focus on this more difficult task of
259
+ abstractive summary generation.""")
260
 
261
  st.markdown("""To generate summaries we will use the [PEGASUS] (https://huggingface.co/google/pegasus-cnn_dailymail)
262
+ model, producing abstractive summaries from large articles. These summaries often contain sentences with different
263
+ kinds of errors. Rather than improving the core model, we will look into possible post-processing steps to improve
264
+ the generated summaries. By comparing contents of the summary with the source text, we come up with a factualness
265
+ metric, indicating the trustworthiness of the generated summary. Throughout this blog, we will also explain the
266
+ results for some methods on specific examples. These text blocks will be indicated and they change according to the
267
+ currently selected article.""")
268
 
269
  # GENERATING SUMMARIES PART
270
  st.header("Generating summaries")
271
  st.markdown("Let’s start by selecting an article text for which we want to generate a summary, or you can provide "
272
  "text yourself. Note that it’s suggested to provide a sufficiently large text, as otherwise the summary "
273
+ "generated from it might not be optimal, leading to suboptimal performance of the post-processing steps.")
274
 
275
  # TODO: NEED TO CHECK ARTICLE TEXT INSTEAD OF ARTICLE NAME ALSO FREE INPUT OPTION
276
  selected_article = st.selectbox('Select an article or provide your own:',
 
282
  height=150
283
  )
284
 
285
+ st.markdown("Below you can find the generated summary for the article. Based on empirical research, we will discuss "
286
+ "two main methods that detect some common errors. We can then score different summaries, to indicate how "
287
+ "factual a summary is for a given article. The idea is that in production, you could generate a set of "
288
+ "summaries for the same article, with different parameters (or even different models). By using "
289
+ "post-processing error detection, we can then select the best possible summary.")
 
290
  if st.session_state.article_text:
291
  with st.spinner('Generating summary...'):
292
  # classify_comment(article_text, selected_model)
 
302
  print("YES")
303
  else:
304
  print("NO")
305
+
306
+
307
  def render_svg(svg_file):
308
  with open(svg_file, "r") as f:
309
  lines = f.readlines()
 
317
 
318
  # ENTITY MATCHING PART
319
  st.header("Entity matching")
320
+ st.markdown("The first method we will discuss is called **Named Entity Recognition** (NER). NER is the task of "
321
+ "identifying and categorising key information (entities) in text. An entity can be a singular word or a "
322
+ "series of words that consistently refers to the same thing. Common entity classes are person names, "
323
+ "organisations, locations and so on. By applying NER to both the article and its summary, we can spot "
324
+ "possible **hallucinations**. Hallucinations are words generated by the model that are not supported by "
325
+ "the source input. In theory all entities in the summary (such as dates, locations and so on), "
326
+ "should also be present in the article. Thus we can extract all entities from the summary and compare "
327
+ "them to the entities of the original article, spotting potential hallucinations. The more unmatched "
328
+ "entities we find, the lower the factualness score of the summary. ")
329
  with st.spinner("Calculating and matching entities..."):
330
  entity_match_html = highlight_entities(selected_article)
331
  st.write(entity_match_html, unsafe_allow_html=True)
 
337
 
338
  markdown_start_red = "<mark class=\"entity\" style=\"background: rgb(238, 135, 135);\">"
339
  markdown_start_green = "<mark class=\"entity\" style=\"background: rgb(121, 236, 121);\">"
340
+ st.markdown("We call this technique “entity matching” and here you can see what this looks like when we apply "
341
+ "this method on the summary. Entities in the summary are marked " + green_text + " when the entity "
342
+ "also exists in the "
343
+ "article, "
344
+ "while unmatched "
345
+ "entities are "
346
+ "marked " +
347
+ red_text + ". Several of the example articles and their summaries indicate different errors we find "
348
+ "by using this technique. Based on which article you choose, we provide a short "
349
+ "explanation of the results below.",
350
  unsafe_allow_html=True)
351
  entity_specific_text = fetch_entity_specific_contents(selected_article)
352
+ soup = BeautifulSoup(entity_specific_text, features="html.parser")
353
+ HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem;
354
+ margin-bottom: 2.5rem">{}</div> """
355
+ st.write("💡👇 **Specific example explanation** 👇💡", HTML_WRAPPER.format(soup), unsafe_allow_html=True)
356
 
357
  # DEPENDENCY PARSING PART
358
  st.header("Dependency comparison")
359
+ st.markdown("The second method we use for post-processing is called **Dependency parsing**: the process in which the "
360
+ "grammatical structure in a sentence is analysed, to find out related words as well as the type of the "
361
+ "relationship between them. For the sentence “Jan’s wife is called Sarah” you would get the following "
362
+ "dependency graph:")
363
 
364
  # TODO: I wonder why the first doesn't work but the second does (it doesn't show deps otherwise)
365
  # st.image("ExampleParsing.svg")
366
  st.write(render_svg('ExampleParsing.svg'), unsafe_allow_html=True)
367
  st.markdown("Here, “Jan” is the “poss” (possession modifier) of “wife”. If suddenly the summary would read “Jan’s "
368
+ "husband…”, there would be a dependency in the summary that is non-existent in the article itself (namely "
369
+ "“Jan” is the “poss” of “husband”). However, often new dependencies are introduced in the summary that "
370
+ "are still correct.The borders of Ukraine” have a different dependency between “borders” and “Ukraine” "
371
+ "than “Ukraine’s borders”, while both descriptions have the same meaning. So just matching all "
372
+ "dependencies between article and summary (as we did with entity matching) would not be a robust method.")
373
+ st.markdown("However, by empirical testing, we have found that there are certain dependencies which can be used for "
374
+ "such matching techniques. When unmatched, these specific dependencies are often an indication of a "
375
+ "wrongly constructed sentence. **Should I explain this more/better or is it enough that I explain by "
376
+ "example specific run throughs?**. We found 2(/3 TODO) common dependencies which, when present in the "
377
+ "summary but not in the article, are highly indicative of factualness errors. Furthermore, we only check "
378
+ "dependencies between an existing **entity** and its direct connections. Below we highlight all unmatched "
379
+ "dependencies that satisfy the discussed constraints. We also discuss the specific results for the "
380
+ "currently selected article.")
381
  with st.spinner("Doing dependency parsing..."):
382
  summary_deps = check_dependency(False)
383
  article_deps = check_dependency(True)
 
390
  if total_unmatched_deps:
391
  for current_drawing_list in total_unmatched_deps:
392
  render_dependency_parsing(current_drawing_list)
393
+ dep_specific_text = fetch_dependency_specific_contents(selected_article)
394
+ soup = BeautifulSoup(dep_specific_text, features="html.parser")
 
395
  HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem;
396
  margin-bottom: 2.5rem">{}</div> """
397
+ st.write("💡👇 **Specific example explanation** 👇💡", HTML_WRAPPER.format(soup), unsafe_allow_html=True)
398
 
399
  # OUTRO/CONCLUSION
400
  st.header("Wrapping up")
401
  st.markdown("We have presented 2 methods that try to improve summaries via post-processing steps. Entity matching can "
402
+ "be used to solve hallucinations, while dependency comparison can be used to filter out some bad "
403
+ "sentences (and thus worse summaries). These methods highlight the possibilities of post-processing "
404
+ "AI-made summaries, but are only a basic introduction. As the methods were empirically tested they are "
405
+ "definitely not sufficiently robust for general use-cases. (something about that we tested also RE and "
406
+ "maybe other things).")
407
  st.markdown("####")
408
+ st.markdown("Below we generated 5 different kind of summaries from the article in which their ranks are estimated, "
409
+ "and hopefully the best summary (read: the one that a human would prefer or indicate as the best one) "
410
+ "will be at the top. TODO: implement this (at the end I think) and also put something in the text with "
411
+ "the actual parameters or something? ")
dependency-specific-text/article11.txt CHANGED
@@ -1,4 +1,4 @@
1
- One of the dependencies that, when found in the summary but not in the article, indicates a possible error is the "poss" (possession modifier) dependency.
2
- In the image above, you can see the unmatched dependency that is found in the summary but not present in the article. For the "poss" dependency, we only check matches when the target word is "in", as it is here. U.S. is the entity here.
3
- For this specific example, it's obvious that the dependency of "in U.S." is not found in the article, as you can already see in the entity matching paragraph that U.S. is a hallucinated entity and doesn't occur in the article itself,
4
- so technically we don't need dependency comparison here to spot this particular error.
 
1
+ One of the dependencies that, when found in the summary but not in the article, indicates a possible error is the "pobj" (object of preposition) dependency.
2
+ Furthermore, we only match *pobj* dependencies when the target word is "in", as in this example.
3
+ In this case it's obvious that "in U.S." is not found in the article, as "U.S." is a hallucinated entity itself as discussed in the entity matching paragraph.
4
+ So technically we don't need dependency comparison to spot the error from this summary.
dependency-specific-text/article13.txt CHANGED
@@ -1,3 +1,9 @@
1
  One of the dependencies that, when found in the summary but not in the article, indicates a possible error is the "amod" (adjectival modifier) dependency.
2
- In the image above, you can see the unmatched dependency that is found in the summary but not present in the article. "First" is the entity here, and it's the adjectival modifier of the word "phone".
3
- However, this sentence is not factual, since the article talks about a **new** type of flagship phone, and not at all the **first** flagship phone. This is wrong, and the error was found by filtering on this specific kind of dependency.
 
 
 
 
 
 
 
1
  One of the dependencies that, when found in the summary but not in the article, indicates a possible error is the "amod" (adjectival modifier) dependency.
2
+ Applied to this summary, we have "First" as the entity, and it is the adjectival modifier of the word "phone".
3
+ And indeed, this unmatched dependency indicates an actual error here. The sentence is not factual, since the article talks about a **new** type of flagship phone,
4
+ and not the **first** flagship phone. This error was found by filtering on this specific kind of dependency. Empirical results showed that unmatched *amod* dependencies often suggest
5
+ that the summary sentence contains an error. <br> <br>
6
+ Another dependency that we use is the "pobj" (object of preposition) dependency.
7
+ Furthermore, we only match *pobj* dependencies when the target word is "in", as in this example.
8
+ In this case the sentence itself contains a factual error (because the article states "there's no word on a US release date yet").
9
+ However, this could have been found by entity matching already (as january 18 is unmatched), and the unmatched dependency can not be completely blamed for this error here.
dependency-specific-text/article16.txt ADDED
File without changes
dependency-specific-text/article4.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ One of the dependencies that, when found in the summary but not in the article, indicates a possible error is the "amod" (adjectival modifier) dependency.
2
+ Applied to this summary, we have "Democratic" as the entity, and it is the adjectival modifier of the word "member".
3
+ And indeed, this unmatched dependency indicates an actual error here. The sentence is not factual for two reasons. <br> <br>
4
+ First, the article talks about "democrats" and "members of the committee", which are two separate things. The summary combines those two in a way
5
+ that can be seen as not completely factual. Second, the statement itself was not made by a democrat (nor a member of the committee), and even though the dependency can't be
6
+ directly linked to this error, empirical results showed that unmatched *amod* dependencies often suggest
7
+ that the summary sentence is incorrect.
dependency-specific-text/article9.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ One of the dependencies that, when found in the summary but not in the article, indicates a possible error is the "pobj" (object of preposition) dependency.
2
+ Furthermore, we only match *pobj* dependencies when the target word is "in", as in this example.
3
+ The sentence here is not a factual error per se, but rather a readability issue. The "in" should be dropped to make the sentence correct.
4
+ For better examples with this specific dependency, try choosing another article. TODO: readability issue with the dependency graph for this specific issue
dependency-specific-text/biden.txt ADDED
File without changes
dependency-specific-text/protestors.txt ADDED
File without changes
entity-specific-text/article11.txt CHANGED
@@ -1,3 +1,4 @@
1
- For this summary, there are 2 unmatched entities: "The Mark Levinson" and "U.S". The first one
2
- is not actually a real error per se, but rather a "the" before "Mark Levinson" (TODO EXPLAIN BIT BETTER).
3
- The "U.S." however is a hallucinated entity not present in the article, and via this method this can be found.
 
 
1
+ As you can see we have 1 unmatched entity: "U.S." is a hallucinated entity in the summary, that does not exist in the article.
2
+ Deep learning based generation is [prone to hallucinate](https://arxiv.org/pdf/2202.03629.pdf) unintended text. These hallucinations degrade
3
+ system performance and fail to meet user expectations in many real-world scenarios. By applying entity matching, we can improve this problem
4
+ for the downstream task of summary generation.
entity-specific-text/article13.txt CHANGED
@@ -1,2 +1,5 @@
1
- For this summary, there are 2 unmatched entities: "January 18" and "U.S". January 18 is indeed a hallucinated entity, as there is no sentence containing this exact date. U.S. does occur in the article, but as "US" instead of "U.S.". This can be solved
2
- by comparing to a list of abbreviations (of embeddings :TODO?)
 
 
 
 
1
+ As you can see we have 2 unmatched entities: "January 18" and "U.S". The first one is a hallucinated entity in the summary, that does not exist in the article.
2
+ Deep learning based generation is [prone to hallucinate](https://arxiv.org/pdf/2202.03629.pdf) unintended text. These hallucinations degrade
3
+ system performance and fail to meet user expectations in many real-world scenarios. By applying entity matching, we can improve this problem
4
+ for the downstream task of summary generation. U.S. **does** occur in the article, but as "US" instead of "U.S.". This could be solved
5
+ by comparing to a list of abbreviations or with a specific embedder for abbreviations but is currently not implemented.
entity-specific-text/article16.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ As you can see we have 1 unmatched entity: "Six9" is a hallucinated entity in the summary, that does not exist in the article.
2
+ Deep learning based generation is [prone to hallucinate](https://arxiv.org/pdf/2202.03629.pdf) unintended text. These hallucinations degrade
3
+ system performance and fail to meet user expectations in many real-world scenarios. By applying entity matching, we can improve this problem
4
+ for the downstream task of summary generation.
entity-specific-text/article4.txt ADDED
File without changes
entity-specific-text/article9.txt ADDED
File without changes
entity-specific-text/biden.txt ADDED
File without changes
entity-specific-text/protestors.txt ADDED
File without changes
requirements.txt CHANGED
@@ -2,6 +2,7 @@ beautifulsoup4==4.10.0
2
  streamlit==1.2.0
3
  transformers==4.15.0
4
  transformers-interpret==0.5.2
 
5
  spacy==3.0.0
6
  spacy_streamlit==1.0.3
7
  flair
 
2
  streamlit==1.2.0
3
  transformers==4.15.0
4
  transformers-interpret==0.5.2
5
+ sentence-transformers==2.2.0
6
  spacy==3.0.0
7
  spacy_streamlit==1.0.3
8
  flair
{sample-articles → sample-articles-temp}/biden.txt RENAMED
File without changes
{sample-articles → sample-articles-temp}/protestors.txt RENAMED
File without changes
sample-articles/article4.txt ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Former White House chief of staff Mark Meadows will no longer cooperate with the House select committee investigating January 6 insurrection, according to a letter from his attorney to the panel, which was obtained by CNN on Tuesday.
2
+
3
+ "We agreed to provide thousands of pages of responsive documents and Mr. Meadows was willing to appear voluntarily, not under compulsion of the Select Committee's subpoena to him, for a deposition to answer questions about non-privileged matters. Now actions by the Select Committee have made such an appearance untenable," the letter from George J. Terwilliger II stated.
4
+
5
+ "In short, we now have every indication from the information supplied to us last Friday -- upon which Mr. Meadows could expect to be questioned -- that the Select Committee has no intention of respecting boundaries concerning Executive Privilege," Terwilliger added.
6
+
7
+ The committee said later Tuesday that it will move forward with a scheduled deposition with Meadows on Wednesday even though he said he no longer plans to cooperate.
8
+
9
+ By proceeding with the scheduled deposition, the committee is setting up a path to hold Meadows in criminal contempt.
10
+
11
+ "Tomorrow's deposition, which was scheduled at Mr. Meadows's request, will go forward as planned. If indeed Mr. Meadows refuses to appear, the Select Committee will be left no choice but to advance contempt proceedings and recommend that the body in which Mr. Meadows once served refer him for criminal prosecution," Democratic Rep. Bennie Thompson of Mississippi and GOP Rep. Liz Cheney of Wyoming, who lead the committee, said in a joint statement.
12
+
13
+ Thompson told CNN later Tuesday evening, "Obviously, we had hoped Mr. Meadows would continue to work with the committee. But obviously based on his lawyer's letter today and his plan to not show up for the deposition, that creates a different dynamic."
14
+
15
+ "As you know, we were prepared to go with contempt earlier, but we withheld it based on what we thought was an agreement that we'd work together. That has not been the case. So obviously, we will move forward with it," he said.
16
+
17
+ Although Thompson indicated criminal contempt was on the table, he made clear that the committee is weighing multiple options, including immunity, that could pave the way for it to get the information that it wants from Meadows.
18
+
19
+ "I think we're interested in getting the information. I think we will still want Mr. Meadows to cooperate. So we will look at all of our options at this point," he said.
20
+
21
+ Responding to the letter from Meadows' attorney, the committee made clear it needs to hear from the former White House chief of staff "about voluminous official records stored in his personal phone and email accounts, which were required to be turned over to the National Archives in accordance with the Presidential Records Act. "
22
+
23
+ A source familiar with the matter told CNN that among the 6,000 pages of documents Meadows has already provided to the committee are communications from January 6. It is still unclear who communicated that day with Meadows but the source said that "many people had Meadows' cell phone."
24
+
25
+ Democratic Rep. Pete Aguilar of California, who serves on the panel, told CNN that within the documents Meadows turned over is evidence that he was in communication with individuals involved in the planning of the rally on January 6 that preceded the riot.
26
+
27
+ "What I'll share is that we continue to learn and we continue to connect the dots," Aguilar said. "But individuals that were responsible for the planning of January 6 in the rally, Mr. Meadows was in communication with, and those are in the documents ... that he turned over himself."
28
+
29
+ Aguilar added that some of the records Meadows turned over, including text messages, were from his personal device.
30
+
31
+ Rep. Zoe Lofgren, a California Democrat and member of the committee, said on CNN's "The Lead with Jake Tapper" that the records including "volumes of material, including real time communication as the riot unfolded. Lofgren said the messages were shared "without an assertion of privilege," and criticized Meadows for then reversing his cooperation.
32
+
33
+ "The committee wants to ask him about some of that, and it's really untenable that all of a sudden at the last minute he's saying no. That somehow there's some reason why he can't talk about this," Lofgren said.
34
+
35
+ CNN first reported last week that Meadows had begun cooperating with the committee, handing over thousands of documents and agreeing to appear for an interview this week.
36
+
37
+ Meadows' about-face is due in part to learning over the weekend that the committee had "issued wide ranging subpoenas for information from a third party communications provider," the letter notes.
38
+
39
+ "As a result of careful and deliberate consideration of these factors, we now must decline the opportunity to appear voluntarily for a deposition," Terwilliger writes.
40
+
41
+ Terwilliger writes that Meadows would answer written questions "so that there might be both an orderly process and a clear record of questions and related assertions of privilege where appropriate."
42
+
43
+ Responding to Meadows' claim that the committee was ignoring his claims of executive privilege, Thompson and Cheney state that Meadows was willing to discuss details about Trump in his new book.
44
+
45
+ "Mark Meadows has informed the Select Committee that he does not intend to cooperate further with our investigation despite his apparent willingness to provide details about the facts and circumstances surrounding the January 6th attack, including conversations with President Trump, in the book he is now promoting and selling," they write.
46
+
47
+ The pair add that they have "numerous questions" for Meadows that have nothing to do with executive privilege.
48
+
49
+ Rep. Stephanie Murphy, a Florida Democrat who's also a member of the select committee, said Tuesday evening that while the panel will continue to do all it can to compel the testimony of witnesses like Meadows and Steve Bannon, members believe they'll be able to get the information they're looking for without their help.
50
+
51
+ "To be fair, it's only a very handful of people who want to risk jail time and fines for contempt of Congress who are obstructing our process," Murphy said.
52
+
53
+ "The vast majority of the people that we have reached out to are providing us with information, with evidence, with text messages, with emails, with details of conversations that they have been a party to. So these people are well within their right to not cooperate, but it's not as if we're not going to get to the information we need."
54
+
55
+ This story has been updated with additional developments Tuesday.
sample-articles/article9.txt ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Novak Djokovic has been granted permission to defend his Australian Open title
2
+
3
+ Australians have reacted angrily to news that tennis player Novak Djokovic will play in the Australian Open, after being exempted from vaccination rules.
4
+
5
+ All players and staff at the tournament must be vaccinated or have an exemption granted by an expert independent panel.
6
+
7
+ Djokovic has not spoken about his vaccination status, but last year said he was "opposed to vaccination".
8
+
9
+ Organisers say he has not been given special treatment. But Australians have criticised officials and the player.
10
+
11
+ The controversy comes as the country is seeing tens of thousands of Covid-19 cases for the first time after enduring some of the world's strictest restrictions.
12
+
13
+ Over 90% of Australia's over-16 population is fully vaccinated, but some Australians still cannot travel interstate or globally because of current restrictions.
14
+
15
+ Amid the row, Australian Prime Minister Scott Morrison said Djokovic would be required to present evidence upon arrival that he has a genuine medical exemption from vaccination, or he would be "on the next plane home".
16
+
17
+ "If that evidence is insufficient, then he won't be treated any different to anyone else and he'll be on the next plane home," the prime minister told reporters. "There should be no special rules for Novak Djokovic at all. None whatsoever."
18
+
19
+ Many Australians had previously accused the government of allowing the rich and famous to do as they please while ordinary people remained separated from sick and dying loved ones.
20
+
21
+ "I think it's a disgrace," Christine Wharton, who lives in Melbourne, where the Australian Open will be held, told ABC.
22
+
23
+ "We've all done the right thing, we've all gone out and got our jabs and our boosters and we have someone that has come from overseas and all of a sudden he's been exempt and can play and I think it's an absolute disgrace and I won't be watching it."
24
+
25
+ A&E doctor Stephen Parnis tweeted: "I don't care how good a tennis player he is. If he's refusing to get vaccinated, he shouldn't be allowed in. "If this exemption is true, it sends an appalling message to millions seeking to reduce #COVID19Aus risk to themselves & others."
26
+
27
+ The decision raised eyebrows with some other tennis players too. "I just think it's very interesting. That's all I'm going to say," Australian Alex de Minaur said.
28
+
29
+ Britain's Jamie Murray added: "I think if it was me that wasn't vaccinated I wouldn't be getting an exemption. You know, but well done to him for getting clear to come to Australia and compete."
30
+
31
+ The Australian Open begins on 17 January, and the event's chief executive Craig Tiley said 26 athletes had applied for medical exemptions. "A handful" had been granted, he said, under guidelines set by federal regulators.
32
+
33
+ "We made it extra difficult for anyone applying for an application to ensure it was the right process and to make sure the medical experts deal with it independently," he told Channel 9.
34
+
35
+ Applications for medical exemptions are being assessed anonymously by two separate panels, with inflammatory cardiac illness or another acute condition listed as valid reasons.
36
+
37
+ But it is also possible Djokovic has recently tested positive for the virus, which would allow him to defer taking the vaccine.
38
+
39
+ He has not revealed his vaccination status and said last April: "Personally I am opposed to vaccination and I wouldn't want to be forced by someone to take a vaccine in order to be able to travel."
40
+
41
+ On Tuesday he said on Instagram: "I've spent fantastic quality time with my loved ones over the break and today I'm heading down under with an exemption permission. Let's go 2022. I am ready to live and breathe tennis in the next few weeks of competition."
42
+
43
+ Media caption,
44
+ "Heartless" Queensland bars US couple from seeing dying father
45
+
46
+ Victoria state government minister Jaala Pulford acknowledged the decision was "frustrating and upsetting", but also denied that Djokovic had received special treatment. Both she and Mr Tiley urged Djokovic to give more information to the public.
47
+
48
+ "It'll certainly be helpful if Novak was to explain the conditions in which he's sought an exemption and granted an exemption but ultimately it's up to him," Mr Tiley said.
sample-summaries/article4.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Former White House chief of staff Mark Meadows will no longer cooperate with the House select committee. The committee says it will move forward with a scheduled deposition with Meadows on Wednesday. By proceeding with the scheduled deposition, the committee is setting up a path to hold Meadows in criminal contempt. A source familiar with the matter told CNN that among the 6,000 pages of documents Meadows has already provided to the committee are communications from January 6. A Democratic member of the committee said Meadows' about-face is due in part to learning over the weekend that the committee had "issued wide ranging subpoenas for information from a third party communications provider".
sample-summaries/article9.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Novak Djokovic has been granted permission to play in the Australian Open. All players and staff at the tournament must be vaccinated or have an exemption.. Djokovic has not spoken about his vaccination status, but last year said he was "opposed to vaccination" Australian Prime Minister Scott Morrison says there should be no special rules for Djokovic, but adds he would be "on the next plane home" if he did not have the right evidence. in Australia is seeing tens of thousands of Covid-19 cases for the first time after enduring some of the world's strictest restrictions.
sample-summaries/biden.txt ADDED
File without changes