Spaces:

nosdigitalmedia
/

dutch-youth-comment-classifier

Sleeping

App Files Files Community

dutch-youth-comment-classifier / app.py

nosdigitalmedia

trigger rebuild

f484a27 over 1 year ago

raw

history blame

3.37 kB

	import gradio as gr
	from huggingface_hub import from_pretrained_fastai

	from src.start_up.start_up_rbs import create_weak_rbs, create_strong_rbs
	from src.start_up.start_up_gibberish import create_gibberish_detector

	# Start up modules
	# Initiate model
	learner = from_pretrained_fastai("nosdigitalmedia/dutch-youth-comment-classifier")
	verdict_map = {
	0: 'Inappropriate',
	1: 'Allowed'
	}

	# Initiate rule based systems (RBS)
	weak_rbs = create_weak_rbs()
	strong_rbs = create_strong_rbs()

	# Initiate gibberish detector
	gibberish_detector = create_gibberish_detector()


	# Define function to judge comment
	def predict(input_text):

	# Get model predictions
	predictions = learner.predict(input_text)

	# Pass through weak RBS
	allows_weak, reasons_weak, highlights_weak = weak_rbs.allows(input_text)

	# Pass through strong RBS
	allows_strong, reasons_strong, highlights_strong = strong_rbs.allows(input_text)

	# Pass through gibberish detector
	is_gibberish_free = gibberish_detector.predict(input_text)

	# Construct json respons
	to_return = {
	'model': {
	'allowed': bool(int(predictions[0])),
	'verdict': verdict_map[int(predictions[0])],
	'highlights': [],
	'reasons': ['Machine learning model does not approve' for el in [predictions[0]] if not int(el)]
	},
	'gibberish': {
	'allowed': bool(is_gibberish_free),
	'verdict': verdict_map[int(is_gibberish_free)],
	'highlights': [],
	'reasons': ['Comment text contains gibberish' for el in [is_gibberish_free] if not el]
	},
	'weak_rbs': {
	'allowed': allows_weak,
	'verdict': verdict_map[allows_weak],
	'highlights': highlights_weak,
	'reasons': reasons_weak
	},
	'strong_rbs': {
	'allowed': allows_strong,
	'verdict': verdict_map[allows_strong],
	'highlights': highlights_strong,
	'reasons': reasons_strong
	},
	}

	return to_return


	# Set up app
	iface = gr.Interface(
	fn=predict,
	inputs="text",
	outputs="json",
	title="Dutch Youth Comment Classifier",
	description="<p>Input a comment and our model and rule based system will tell you if the comment is appropriate for "
	"youth. Four systems are used to judge the comment: "
	"<ol> <li> <code>model</code> refers to the Deep Learning model trained on moderator decisions as provided <a href=\"https://huggingface.co/nosdigitalmedia/dutch-youth-comment-classifier\">here</a>. </li>"
	"<li> <code>gibberish</code> refers to a markov-chain model predicting whether a comment is actual text or a random sequence of characters </li>"
	"<li> <code>weak_rbs</code> refers to a rule-based system that flags ambiguous comments that should be checked by a human. </li>"
	"<li> <code>strong_rbs</code> refers to a rule-based system that flags inappropriate comments that could be removed directly.</ol></p>",
	examples=[
	"Ik zag gisteren een puppy",
	"hljksdfghslkdjfghlsdkfghjslkdjhfg",
	"mijn email adress is abc@hotmail.com",
	"<H1>Dit is mijn website nu</H1>",
	"mijn nicht is een zuster",
	"jij bent een klootzak"
	]
	)
	iface.launch(share=False)