Marroco93 commited on
Commit
1aafe2e
1 Parent(s): a2d539a

no message

Browse files
Files changed (1) hide show
  1. main.py +6 -0
main.py CHANGED
@@ -85,6 +85,8 @@ async def generate_text(item: Item):
85
 
86
  def split_text_by_tokens(text, max_tokens=1024):
87
  # Tokenize the text
 
 
88
  tokens = tokenizer.tokenize(text)
89
  # Split into chunks of max_tokens
90
  for i in range(0, len(tokens), max_tokens):
@@ -95,7 +97,11 @@ def summarize_large_text(text):
95
  # Use the updated split_text_by_tokens function
96
  chunks = list(split_text_by_tokens(text, max_tokens=1024 - 10)) # Slight buffer to avoid edge cases
97
  summaries = []
 
 
98
  for chunk in chunks:
 
 
99
  # Check if chunk is within the token limit just to be sure
100
  chunk_tokens = tokenizer.encode(chunk)
101
  if len(chunk_tokens) > 1024:
 
85
 
86
  def split_text_by_tokens(text, max_tokens=1024):
87
  # Tokenize the text
88
+ print("Tokenizing text...")
89
+
90
  tokens = tokenizer.tokenize(text)
91
  # Split into chunks of max_tokens
92
  for i in range(0, len(tokens), max_tokens):
 
97
  # Use the updated split_text_by_tokens function
98
  chunks = list(split_text_by_tokens(text, max_tokens=1024 - 10)) # Slight buffer to avoid edge cases
99
  summaries = []
100
+ print("Tokenization complete, summarizing chunks...")
101
+
102
  for chunk in chunks:
103
+ print("loop chunks...")
104
+
105
  # Check if chunk is within the token limit just to be sure
106
  chunk_tokens = tokenizer.encode(chunk)
107
  if len(chunk_tokens) > 1024: