tommasobaldi commited on
Commit
d33b093
1 Parent(s): edce3dc

add requirements.txt

Browse files
Files changed (2) hide show
  1. Summarizer.py +0 -56
  2. app.py +12 -15
Summarizer.py DELETED
@@ -1,56 +0,0 @@
1
- import nltk
2
-
3
- from sumy.nlp.stemmers import Stemmer
4
- from sumy.summarizers.lsa import LsaSummarizer
5
- from sumy.utils import get_stop_words
6
- from transformers import Pipeline
7
-
8
- class Summarizer:
9
- DEFAULT_LANGUAGE = "english"
10
- DEFAULT_SENTENCE_LENGTH = 15
11
-
12
- def __init__(self, pipeline: Pipeline):
13
- self.pipeline = pipeline
14
- stemmer = Stemmer(Summarizer.DEFAULT_LANGUAGE)
15
- self.lsa_summarizer = LsaSummarizer(stemmer)
16
- self.lsa_summarizer.stop_words = get_stop_words(language=Summarizer.DEFAULT_LANGUAGE)
17
-
18
- @staticmethod
19
- def sentence_list(summarized_sentences) -> list:
20
- summarized_list = []
21
- for sentence in summarized_sentences:
22
- summarized_list.append(sentence._text)
23
- return summarized_list
24
-
25
- @staticmethod
26
- def join_sentences(summarized_sentences: list) -> str:
27
- return " ".join([sentence for sentence in summarized_sentences])
28
-
29
- @staticmethod
30
- def split_sentences_by_token_length(summary_sentences: list, split_token_length: int) -> list:
31
- accumulated_list = []
32
- result_list = []
33
- cumulative_token_length = 0
34
- for sentence in summary_sentences:
35
- token_list = [token for token in nltk.word_tokenize(sentence) if token not in ["."]]
36
- token_length = len(token_list)
37
- if token_length + cumulative_token_length > split_token_length and result_list:
38
- accumulated_list.append(Summarizer.join_sentences(result_list))
39
- result_list = [sentence]
40
- cumulative_token_length = token_length
41
- else:
42
- result_list.append(sentence)
43
- cumulative_token_length += token_length
44
-
45
- if result_list:
46
- accumulated_list.append(Summarizer.join_sentences(result_list))
47
-
48
- return accumulated_list
49
-
50
- def abstractive_summary(self, summary_sentences: list) -> list:
51
- wrapped_sentences = Summarizer.split_sentences_by_token_length(summary_sentences, split_token_length=600)
52
- summary_list = []
53
- for result in self.pipeline(wrapped_sentences, min_length=32, max_length=512):
54
- summary_list.append(result['summary_text'])
55
-
56
- return summary_list
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -8,8 +8,6 @@ import validators
8
  from transformers import pipeline
9
  from validators import ValidationFailure
10
 
11
- from Summarizer import Summarizer
12
-
13
 
14
  def main() -> None:
15
  nltk.download("punkt")
@@ -58,12 +56,10 @@ def main() -> None:
58
  text = file.read()
59
  return text
60
 
61
- summarizer: Summarizer = Summarizer(create_pipeline())
62
-
63
  if "target_text" not in st.session_state:
64
  st.session_state.target_text = ""
65
  if "sentence_lenght" not in st.session_state:
66
- st.session_state.sentence_length = Summarizer.DEFAULT_SENTENCE_LENGTH
67
  if "sample_choice" not in st.session_state:
68
  st.session_state.sentence_length = ""
69
 
@@ -89,18 +85,19 @@ def main() -> None:
89
 
90
  summarize_button = st.button(label="Try it!")
91
 
92
- @st.cache(suppress_st_warning=True,
93
- show_spinner=False,
94
- allow_output_mutation=True,
95
- hash_funcs={"torch.nn.parameter.Parameter": lambda _: None,
96
- "tokenizers.Tokenizer": lambda _: None,
97
- "tokenizers.AddedToken": lambda _: None,
98
- })
 
99
 
100
 
101
- def summary_from_cache(summary_sentence: tuple) -> tuple:
102
- with st.spinner("Summarizing in progress..."):
103
- return tuple(summarizer.abstractive_summary(list(summary_sentence)))
104
 
105
  if summarize_button:
106
  output = pipeline(st.session_state.target_text)
 
8
  from transformers import pipeline
9
  from validators import ValidationFailure
10
 
 
 
11
 
12
  def main() -> None:
13
  nltk.download("punkt")
 
56
  text = file.read()
57
  return text
58
 
 
 
59
  if "target_text" not in st.session_state:
60
  st.session_state.target_text = ""
61
  if "sentence_lenght" not in st.session_state:
62
+ st.session_state.sentence_length = 15
63
  if "sample_choice" not in st.session_state:
64
  st.session_state.sentence_length = ""
65
 
 
85
 
86
  summarize_button = st.button(label="Try it!")
87
 
88
+ # @st.cache(suppress_st_warning=True,
89
+ # show_spinner=False,
90
+ # allow_output_mutation=True,
91
+ # hash_funcs={"torch.nn.parameter.Parameter": lambda _: None,
92
+ # "tokenizers.Tokenizer": lambda _: None,
93
+ # "tokenizers.AddedToken": lambda _: None,
94
+ # }
95
+ # )
96
 
97
 
98
+ # def summary_from_cache(summary_sentence: tuple) -> tuple:
99
+ # with st.spinner("Summarizing in progress..."):
100
+ # return tuple(summarizer.abstractive_summary(list(summary_sentence)))
101
 
102
  if summarize_button:
103
  output = pipeline(st.session_state.target_text)