Spaces:
Runtime error
Runtime error
tommasobaldi
commited on
Commit
•
c1aef33
1
Parent(s):
f6ab2e2
working on text splitting
Browse files
app.py
CHANGED
@@ -56,8 +56,8 @@ def main() -> None:
|
|
56 |
|
57 |
for sentence in sentences:
|
58 |
# token_list = [token for token in nltk.word_tokenize(sentence)]
|
59 |
-
token_list = tokenizer(sentence, max_length=1024, truncation=True
|
60 |
-
token_length = len(token_list)
|
61 |
if token_length + cumulative_token_length > split_token_length and result_list:
|
62 |
accumulated_lists.append(join_sentences(result_list))
|
63 |
result_list = [sentence]
|
|
|
56 |
|
57 |
for sentence in sentences:
|
58 |
# token_list = [token for token in nltk.word_tokenize(sentence)]
|
59 |
+
token_list = tokenizer(sentence, max_length=1024, truncation=True)
|
60 |
+
token_length = len(token_list["input_ids"])
|
61 |
if token_length + cumulative_token_length > split_token_length and result_list:
|
62 |
accumulated_lists.append(join_sentences(result_list))
|
63 |
result_list = [sentence]
|