Spaces:
Sleeping
Sleeping
no message
Browse files
main.py
CHANGED
@@ -85,6 +85,8 @@ async def generate_text(item: Item):
|
|
85 |
|
86 |
def split_text_by_tokens(text, max_tokens=1024):
|
87 |
# Tokenize the text
|
|
|
|
|
88 |
tokens = tokenizer.tokenize(text)
|
89 |
# Split into chunks of max_tokens
|
90 |
for i in range(0, len(tokens), max_tokens):
|
@@ -95,7 +97,11 @@ def summarize_large_text(text):
|
|
95 |
# Use the updated split_text_by_tokens function
|
96 |
chunks = list(split_text_by_tokens(text, max_tokens=1024 - 10)) # Slight buffer to avoid edge cases
|
97 |
summaries = []
|
|
|
|
|
98 |
for chunk in chunks:
|
|
|
|
|
99 |
# Check if chunk is within the token limit just to be sure
|
100 |
chunk_tokens = tokenizer.encode(chunk)
|
101 |
if len(chunk_tokens) > 1024:
|
|
|
85 |
|
86 |
def split_text_by_tokens(text, max_tokens=1024):
|
87 |
# Tokenize the text
|
88 |
+
print("Tokenizing text...")
|
89 |
+
|
90 |
tokens = tokenizer.tokenize(text)
|
91 |
# Split into chunks of max_tokens
|
92 |
for i in range(0, len(tokens), max_tokens):
|
|
|
97 |
# Use the updated split_text_by_tokens function
|
98 |
chunks = list(split_text_by_tokens(text, max_tokens=1024 - 10)) # Slight buffer to avoid edge cases
|
99 |
summaries = []
|
100 |
+
print("Tokenization complete, summarizing chunks...")
|
101 |
+
|
102 |
for chunk in chunks:
|
103 |
+
print("loop chunks...")
|
104 |
+
|
105 |
# Check if chunk is within the token limit just to be sure
|
106 |
chunk_tokens = tokenizer.encode(chunk)
|
107 |
if len(chunk_tokens) > 1024:
|