nosdigitalmedia's picture
Attempt to set up application
4e0f321
from src.rule_based_system.Rule import Rule
from src.rule_based_system.TextLengthRule import TEXT_SIZE_LIMIT
from src.rule_based_system.Verdict import Verdict
class BadWordRule(Rule):
"""
Bad words obtained from corners of the internet you do not want to visit:
- https://www.ensie.nl/scheldwoordenboek#
- https://scheldwoorden.goedbegin.nl/
- https://nl.wiktionary.org/wiki/Categorie:Scheldwoord_in_het_Nederlands
- https://www.lannoo.be/sites/default/files/books/issuu/9789401453417.pdf
- https://www.dutchmultimedia.nl/meest-verschrikkelijke-engelse-scheldwoorden/
- https://www.dutchmultimedia.nl/scheldwoordenboek-1-000-den-nederlandse-scheldwoorden/
- https://www.henkyspapiamento.com/10-papiaments-scheldwoorden-die-we-liever-niet-horen/
- https://volkabulaire.nl/tag/scheldwoorden/
- https://data.world/wordlists/dirty-naughty-obscene-and-otherwise-bad-words-in-dutch
"""
bad_words = None
def __init__(self, bad_words: list, strict: bool):
self.bad_words = bad_words
self.strict = strict
def get_verdict(self, comment_text: str) -> Verdict:
comment_text = comment_text[0:TEXT_SIZE_LIMIT]
bad_words = self.find_bad_words(comment_text.split())
return Verdict(len(bad_words) == 0, bad_words)
def find_bad_words(self, text: list) -> list:
detected_bad_words = []
for word in text:
if word in self.bad_words:
detected_bad_words.append(word)
return detected_bad_words
def is_strict(self) -> bool:
return self.strict
def get_rule_description(self) -> str:
return "Comment text contained %s inappropriate words" % \
('strictly' if self.is_strict() else 'ambiguous')