Spaces:
Runtime error
Runtime error
tommasobaldi
commited on
Commit
•
87fbf70
1
Parent(s):
69f90b2
working on text splitting
Browse files
app.py
CHANGED
@@ -97,29 +97,33 @@ def main() -> None:
|
|
97 |
# with st.spinner("Summarizing in progress..."):
|
98 |
# return tuple(summarizer.abstractive_summary(list(summary_sentence)))
|
99 |
|
100 |
-
def
|
101 |
-
|
102 |
|
103 |
-
|
104 |
-
|
105 |
-
|
|
|
106 |
for sentence in sentences:
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
|
|
|
|
111 |
else:
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
|
|
116 |
|
117 |
pipe = create_pipeline()
|
118 |
|
119 |
if summarize_button:
|
120 |
if target_text_input is not "":
|
121 |
with st.spinner("Summarizing in progress..."):
|
122 |
-
sentences =
|
123 |
for sentence in sentences:
|
124 |
st.text(sentence)
|
125 |
#output = pipe(sentence)
|
|
|
97 |
# with st.spinner("Summarizing in progress..."):
|
98 |
# return tuple(summarizer.abstractive_summary(list(summary_sentence)))
|
99 |
|
100 |
+
def join_sentences(sentences: list) -> str:
|
101 |
+
return " ".join([sentence for sentence in sentences])
|
102 |
|
103 |
+
def split_sentences_by_token_length(sentences: list, split_token_length: int) -> list:
|
104 |
+
accumulated_lists = []
|
105 |
+
result_list = []
|
106 |
+
cumulative_token_length = 0
|
107 |
for sentence in sentences:
|
108 |
+
token_list = [token for token in nltk.word_tokenize(sentence) if token not in ['.']]
|
109 |
+
token_length = len(token_list)
|
110 |
+
if token_length + cumulative_token_length > split_token_length and result_list:
|
111 |
+
accumulated_lists.append(join_sentences(result_list))
|
112 |
+
result_list = [sentence]
|
113 |
+
cumulative_token_length = token_length
|
114 |
else:
|
115 |
+
result_list.append(sentence)
|
116 |
+
cumulative_token_length += token_length
|
117 |
+
if result_list:
|
118 |
+
accumulated_lists.append(join_sentences(result_list))
|
119 |
+
return accumulated_lists
|
120 |
|
121 |
pipe = create_pipeline()
|
122 |
|
123 |
if summarize_button:
|
124 |
if target_text_input is not "":
|
125 |
with st.spinner("Summarizing in progress..."):
|
126 |
+
sentences = split_sentences_by_token_length(target_text_input)
|
127 |
for sentence in sentences:
|
128 |
st.text(sentence)
|
129 |
#output = pipe(sentence)
|