Spaces:

nosdigitalmedia
/

dutch-youth-comment-classifier

Running

App Files Files Community

dutch-youth-comment-classifier / src /rule_based_system /BadWordRule.py

nosdigitalmedia

Attempt to set up application

4e0f321 almost 2 years ago

raw

history blame contribute delete

1.81 kB

	from src.rule_based_system.Rule import Rule

	from src.rule_based_system.TextLengthRule import TEXT_SIZE_LIMIT
	from src.rule_based_system.Verdict import Verdict


	class BadWordRule(Rule):
	"""
	Bad words obtained from corners of the internet you do not want to visit:
	- https://www.ensie.nl/scheldwoordenboek#
	- https://scheldwoorden.goedbegin.nl/
	- https://nl.wiktionary.org/wiki/Categorie:Scheldwoord_in_het_Nederlands
	- https://www.lannoo.be/sites/default/files/books/issuu/9789401453417.pdf
	- https://www.dutchmultimedia.nl/meest-verschrikkelijke-engelse-scheldwoorden/
	- https://www.dutchmultimedia.nl/scheldwoordenboek-1-000-den-nederlandse-scheldwoorden/
	- https://www.henkyspapiamento.com/10-papiaments-scheldwoorden-die-we-liever-niet-horen/
	- https://volkabulaire.nl/tag/scheldwoorden/
	- https://data.world/wordlists/dirty-naughty-obscene-and-otherwise-bad-words-in-dutch
	"""

	bad_words = None

	def __init__(self, bad_words: list, strict: bool):
	self.bad_words = bad_words
	self.strict = strict

	def get_verdict(self, comment_text: str) -> Verdict:
	comment_text = comment_text[0:TEXT_SIZE_LIMIT]

	bad_words = self.find_bad_words(comment_text.split())

	return Verdict(len(bad_words) == 0, bad_words)

	def find_bad_words(self, text: list) -> list:
	detected_bad_words = []
	for word in text:
	if word in self.bad_words:
	detected_bad_words.append(word)

	return detected_bad_words

	def is_strict(self) -> bool:
	return self.strict

	def get_rule_description(self) -> str:
	return "Comment text contained %s inappropriate words" % \
	('strictly' if self.is_strict() else 'ambiguous')