Remove unused transform
Browse files- transforms_cased.py +0 -38
transforms_cased.py
CHANGED
@@ -17,7 +17,6 @@ __all__ = [
|
|
17 |
"DropWords",
|
18 |
"FilterPOS",
|
19 |
"FrequencyMinWordCount",
|
20 |
-
"FrequencyTopK",
|
21 |
"ReplaceSeparators",
|
22 |
"ToLowercase",
|
23 |
"ToSingular",
|
@@ -257,43 +256,6 @@ class FrequencyMinWordCount(BaseTextTransform):
|
|
257 |
return f"{self.__class__.__name__}(min_count={self.min_count})"
|
258 |
|
259 |
|
260 |
-
class FrequencyTopK(BaseTextTransform):
|
261 |
-
"""Keep only the top k most frequent words in the input text.
|
262 |
-
|
263 |
-
In case of a tie, all words with the same count as the last word are kept.
|
264 |
-
|
265 |
-
Args:
|
266 |
-
top_k (int): Number of top words to keep.
|
267 |
-
"""
|
268 |
-
|
269 |
-
def __init__(self, top_k: int) -> None:
|
270 |
-
super().__init__()
|
271 |
-
self.top_k = top_k
|
272 |
-
|
273 |
-
def __call__(self, text: str) -> str:
|
274 |
-
"""
|
275 |
-
Args:
|
276 |
-
text (str): Text to remove infrequent words from.
|
277 |
-
"""
|
278 |
-
if self.top_k < 1:
|
279 |
-
return text
|
280 |
-
|
281 |
-
words = text.split()
|
282 |
-
word_counts = {word: words.count(word) for word in words}
|
283 |
-
top_words = sorted(word_counts, key=word_counts.get, reverse=True)
|
284 |
-
|
285 |
-
# in case of a tie, keep all words with the same count
|
286 |
-
top_words = top_words[: self.top_k]
|
287 |
-
top_words = [word for word in top_words if word_counts[word] == word_counts[top_words[-1]]]
|
288 |
-
|
289 |
-
text = " ".join([word for word in words if word in top_words])
|
290 |
-
|
291 |
-
return text
|
292 |
-
|
293 |
-
def __repr__(self) -> str:
|
294 |
-
return f"{self.__class__.__name__}(top_k={self.top_k})"
|
295 |
-
|
296 |
-
|
297 |
class ReplaceSeparators(BaseTextTransform):
|
298 |
"""Replace underscores and dashes with spaces."""
|
299 |
|
|
|
17 |
"DropWords",
|
18 |
"FilterPOS",
|
19 |
"FrequencyMinWordCount",
|
|
|
20 |
"ReplaceSeparators",
|
21 |
"ToLowercase",
|
22 |
"ToSingular",
|
|
|
256 |
return f"{self.__class__.__name__}(min_count={self.min_count})"
|
257 |
|
258 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
259 |
class ReplaceSeparators(BaseTextTransform):
|
260 |
"""Replace underscores and dashes with spaces."""
|
261 |
|