Spaces:

Marroco93
/

PacmanAI-2

Sleeping

Marroco93 commited on Apr 19, 2024

Commit

1aafe2e

•

1 Parent(s): a2d539a

no message

Files changed (1) hide show

main.py CHANGED Viewed

@@ -85,6 +85,8 @@ async def generate_text(item: Item):
 def split_text_by_tokens(text, max_tokens=1024):
     # Tokenize the text
     tokens = tokenizer.tokenize(text)
     # Split into chunks of max_tokens
     for i in range(0, len(tokens), max_tokens):
@@ -95,7 +97,11 @@ def summarize_large_text(text):
     # Use the updated split_text_by_tokens function
     chunks = list(split_text_by_tokens(text, max_tokens=1024 - 10))  # Slight buffer to avoid edge cases
     summaries = []
     for chunk in chunks:
         # Check if chunk is within the token limit just to be sure
         chunk_tokens = tokenizer.encode(chunk)
         if len(chunk_tokens) > 1024:

 def split_text_by_tokens(text, max_tokens=1024):
     # Tokenize the text
+    print("Tokenizing text...")
     tokens = tokenizer.tokenize(text)
     # Split into chunks of max_tokens
     for i in range(0, len(tokens), max_tokens):
     # Use the updated split_text_by_tokens function
     chunks = list(split_text_by_tokens(text, max_tokens=1024 - 10))  # Slight buffer to avoid edge cases
     summaries = []
+    print("Tokenization complete, summarizing chunks...")
     for chunk in chunks:
+        print("loop chunks...")
         # Check if chunk is within the token limit just to be sure
         chunk_tokens = tokenizer.encode(chunk)
         if len(chunk_tokens) > 1024: