Spaces:
Running
Running
๐ consolidate outname
Browse filesSigned-off-by: peter szemraj <peterszemraj@gmail.com>
utils.py
CHANGED
@@ -107,16 +107,16 @@ def load_example_filenames(example_path: str or Path):
|
|
107 |
|
108 |
|
109 |
def extract_keywords(
|
110 |
-
text: str, num_keywords: int = 3, window_size: int = 5
|
111 |
) -> List[str]:
|
112 |
"""
|
113 |
Extracts keywords from a text using a simplified TextRank algorithm.
|
114 |
|
115 |
Args:
|
116 |
text: The text to extract keywords from.
|
117 |
-
num_keywords: The number of keywords to extract. Default
|
118 |
-
window_size: The number of words considered for co-occurrence. Default
|
119 |
-
|
120 |
Returns:
|
121 |
A list of strings, where each string is a keyword extracted from the input text.
|
122 |
"""
|
@@ -155,8 +155,8 @@ def extract_keywords(
|
|
155 |
final_keywords = []
|
156 |
for keyword in keywords:
|
157 |
if not any(fuzz.ratio(keyword, other) > 70 for other in final_keywords):
|
158 |
-
final_keywords.append(keyword)
|
159 |
-
logger.debug(f"Keywords (
|
160 |
return final_keywords
|
161 |
|
162 |
|
@@ -177,10 +177,10 @@ def saves_summary(
|
|
177 |
scores_text = "\n".join(sum_scores)
|
178 |
full_summary = "\n".join(sum_text)
|
179 |
|
180 |
-
keywords = "_".join(extract_keywords(full_summary))
|
181 |
logger.debug(f"kw:\t{keywords}")
|
182 |
outpath = (
|
183 |
-
Path.cwd() / f"
|
184 |
if outpath is None
|
185 |
else Path(outpath)
|
186 |
)
|
|
|
107 |
|
108 |
|
109 |
def extract_keywords(
|
110 |
+
text: str, num_keywords: int = 3, window_size: int = 5, kw_max_len: int = 20
|
111 |
) -> List[str]:
|
112 |
"""
|
113 |
Extracts keywords from a text using a simplified TextRank algorithm.
|
114 |
|
115 |
Args:
|
116 |
text: The text to extract keywords from.
|
117 |
+
num_keywords: The number of keywords to extract. Default: 3
|
118 |
+
window_size: The number of words considered for co-occurrence. Default: 5
|
119 |
+
kw_max_len: The maximum length of a keyword (truncate longer keywords to max). Default: 20
|
120 |
Returns:
|
121 |
A list of strings, where each string is a keyword extracted from the input text.
|
122 |
"""
|
|
|
155 |
final_keywords = []
|
156 |
for keyword in keywords:
|
157 |
if not any(fuzz.ratio(keyword, other) > 70 for other in final_keywords):
|
158 |
+
final_keywords.append(keyword[:kw_max_len])
|
159 |
+
logger.debug(f"Keywords (max len. {kw_max_len}):\t{final_keywords}")
|
160 |
return final_keywords
|
161 |
|
162 |
|
|
|
177 |
scores_text = "\n".join(sum_scores)
|
178 |
full_summary = "\n".join(sum_text)
|
179 |
|
180 |
+
keywords = "_".join(extract_keywords(full_summary, kw_max_len=4))
|
181 |
logger.debug(f"kw:\t{keywords}")
|
182 |
outpath = (
|
183 |
+
Path.cwd() / f"DocSummary_{keywords}_{get_timestamp()}.txt"
|
184 |
if outpath is None
|
185 |
else Path(outpath)
|
186 |
)
|