Marroco93 commited on
Commit
22a4b4f
1 Parent(s): 669af95

no message

Browse files
Files changed (1) hide show
  1. main.py +5 -5
main.py CHANGED
@@ -95,7 +95,7 @@ def preprocess_text(text: str) -> str:
95
  text = re.sub(r'[^\w\s]', '', text)
96
  return text
97
 
98
- def reduce_tokens(text: str) -> str:
99
  # Process the text with spaCy
100
  doc = nlp(text)
101
  # Select sentences that might be more important - this is a simple heuristic
@@ -103,10 +103,10 @@ def reduce_tokens(text: str) -> str:
103
  for sent in doc.sents:
104
  if any(tok.dep_ == 'ROOT' for tok in sent):
105
  important_sentences.append(sent.text)
106
- # Join selected sentences to form the reduced text
107
  reduced_text = ' '.join(important_sentences)
108
  # Tokenize the reduced text to count the tokens
109
- reduced_doc = nlp(reduced_text)
110
  token_count = len(reduced_doc)
111
  return reduced_text, token_count
112
 
@@ -116,8 +116,8 @@ async def summarize(request: TextRequest):
116
  processed_text = preprocess_text(request.text)
117
  reduced_text, token_count = reduce_tokens(processed_text)
118
  return {
119
- "token_count": token_count,
120
- "reduced_text": reduced_text
121
  }
122
 
123
  except Exception as e:
 
95
  text = re.sub(r'[^\w\s]', '', text)
96
  return text
97
 
98
+ def reduce_tokens(text: str):
99
  # Process the text with spaCy
100
  doc = nlp(text)
101
  # Select sentences that might be more important - this is a simple heuristic
 
103
  for sent in doc.sents:
104
  if any(tok.dep_ == 'ROOT' for tok in sent):
105
  important_sentences.append(sent.text)
106
+ # Join selected sentences to form the reduced text
107
  reduced_text = ' '.join(important_sentences)
108
  # Tokenize the reduced text to count the tokens
109
+ reduced_doc = nlp(reduced_text) # Ensure this line is correctly aligned
110
  token_count = len(reduced_doc)
111
  return reduced_text, token_count
112
 
 
116
  processed_text = preprocess_text(request.text)
117
  reduced_text, token_count = reduce_tokens(processed_text)
118
  return {
119
+ "reduced_text": reduced_text,
120
+ "token_count": token_count
121
  }
122
 
123
  except Exception as e: