pszemraj commited on
Commit
c9456c2
Β·
1 Parent(s): 55b49e6

πŸ’„ consolidate outname

Browse files

Signed-off-by: peter szemraj <peterszemraj@gmail.com>

Files changed (1) hide show
  1. utils.py +8 -8
utils.py CHANGED
@@ -107,16 +107,16 @@ def load_example_filenames(example_path: str or Path):
107
 
108
 
109
  def extract_keywords(
110
- text: str, num_keywords: int = 3, window_size: int = 5
111
  ) -> List[str]:
112
  """
113
  Extracts keywords from a text using a simplified TextRank algorithm.
114
 
115
  Args:
116
  text: The text to extract keywords from.
117
- num_keywords: The number of keywords to extract. Default is 5.
118
- window_size: The number of words considered for co-occurrence. Default is 5.
119
-
120
  Returns:
121
  A list of strings, where each string is a keyword extracted from the input text.
122
  """
@@ -155,8 +155,8 @@ def extract_keywords(
155
  final_keywords = []
156
  for keyword in keywords:
157
  if not any(fuzz.ratio(keyword, other) > 70 for other in final_keywords):
158
- final_keywords.append(keyword)
159
- logger.debug(f"Keywords (final):\t{final_keywords}")
160
  return final_keywords
161
 
162
 
@@ -177,10 +177,10 @@ def saves_summary(
177
  scores_text = "\n".join(sum_scores)
178
  full_summary = "\n".join(sum_text)
179
 
180
- keywords = "_".join(extract_keywords(full_summary))
181
  logger.debug(f"kw:\t{keywords}")
182
  outpath = (
183
- Path.cwd() / f"document_summary_{keywords}_{get_timestamp()}.txt"
184
  if outpath is None
185
  else Path(outpath)
186
  )
 
107
 
108
 
109
  def extract_keywords(
110
+ text: str, num_keywords: int = 3, window_size: int = 5, kw_max_len: int = 20
111
  ) -> List[str]:
112
  """
113
  Extracts keywords from a text using a simplified TextRank algorithm.
114
 
115
  Args:
116
  text: The text to extract keywords from.
117
+ num_keywords: The number of keywords to extract. Default: 3
118
+ window_size: The number of words considered for co-occurrence. Default: 5
119
+ kw_max_len: The maximum length of a keyword (truncate longer keywords to max). Default: 20
120
  Returns:
121
  A list of strings, where each string is a keyword extracted from the input text.
122
  """
 
155
  final_keywords = []
156
  for keyword in keywords:
157
  if not any(fuzz.ratio(keyword, other) > 70 for other in final_keywords):
158
+ final_keywords.append(keyword[:kw_max_len])
159
+ logger.debug(f"Keywords (max len. {kw_max_len}):\t{final_keywords}")
160
  return final_keywords
161
 
162
 
 
177
  scores_text = "\n".join(sum_scores)
178
  full_summary = "\n".join(sum_text)
179
 
180
+ keywords = "_".join(extract_keywords(full_summary, kw_max_len=4))
181
  logger.debug(f"kw:\t{keywords}")
182
  outpath = (
183
+ Path.cwd() / f"DocSummary_{keywords}_{get_timestamp()}.txt"
184
  if outpath is None
185
  else Path(outpath)
186
  )