Spaces:

ML-unipi
/

TermsOfServiceSummarization

Runtime error

tommasobaldi commited on Aug 28, 2022

Commit

c1aef33

•

1 Parent(s): f6ab2e2

working on text splitting

Files changed (1) hide show

app.py CHANGED Viewed

@@ -56,8 +56,8 @@ def main() -> None:
         for sentence in sentences:
             # token_list = [token for token in nltk.word_tokenize(sentence)]
-            token_list = tokenizer(sentence, max_length=1024, truncation=True, padding="max_length", return_tensors="pt")
-            token_length = len(token_list)
             if token_length + cumulative_token_length > split_token_length and result_list:
                 accumulated_lists.append(join_sentences(result_list))
                 result_list = [sentence]

         for sentence in sentences:
             # token_list = [token for token in nltk.word_tokenize(sentence)]
+            token_list = tokenizer(sentence, max_length=1024, truncation=True)
+            token_length = len(token_list["input_ids"])
             if token_length + cumulative_token_length > split_token_length and result_list:
                 accumulated_lists.append(join_sentences(result_list))
                 result_list = [sentence]