nosdigitalmedia's picture
trigger rebuild
f484a27
raw
history blame
3.37 kB
import gradio as gr
from huggingface_hub import from_pretrained_fastai
from src.start_up.start_up_rbs import create_weak_rbs, create_strong_rbs
from src.start_up.start_up_gibberish import create_gibberish_detector
# Start up modules
# Initiate model
learner = from_pretrained_fastai("nosdigitalmedia/dutch-youth-comment-classifier")
verdict_map = {
0: 'Inappropriate',
1: 'Allowed'
}
# Initiate rule based systems (RBS)
weak_rbs = create_weak_rbs()
strong_rbs = create_strong_rbs()
# Initiate gibberish detector
gibberish_detector = create_gibberish_detector()
# Define function to judge comment
def predict(input_text):
# Get model predictions
predictions = learner.predict(input_text)
# Pass through weak RBS
allows_weak, reasons_weak, highlights_weak = weak_rbs.allows(input_text)
# Pass through strong RBS
allows_strong, reasons_strong, highlights_strong = strong_rbs.allows(input_text)
# Pass through gibberish detector
is_gibberish_free = gibberish_detector.predict(input_text)
# Construct json respons
to_return = {
'model': {
'allowed': bool(int(predictions[0])),
'verdict': verdict_map[int(predictions[0])],
'highlights': [],
'reasons': ['Machine learning model does not approve' for el in [predictions[0]] if not int(el)]
},
'gibberish': {
'allowed': bool(is_gibberish_free),
'verdict': verdict_map[int(is_gibberish_free)],
'highlights': [],
'reasons': ['Comment text contains gibberish' for el in [is_gibberish_free] if not el]
},
'weak_rbs': {
'allowed': allows_weak,
'verdict': verdict_map[allows_weak],
'highlights': highlights_weak,
'reasons': reasons_weak
},
'strong_rbs': {
'allowed': allows_strong,
'verdict': verdict_map[allows_strong],
'highlights': highlights_strong,
'reasons': reasons_strong
},
}
return to_return
# Set up app
iface = gr.Interface(
fn=predict,
inputs="text",
outputs="json",
title="Dutch Youth Comment Classifier",
description="<p>Input a comment and our model and rule based system will tell you if the comment is appropriate for "
"youth. Four systems are used to judge the comment: "
"<ol> <li> <code>model</code> refers to the Deep Learning model trained on moderator decisions as provided <a href=\"https://huggingface.co/nosdigitalmedia/dutch-youth-comment-classifier\">here</a>. </li>"
"<li> <code>gibberish</code> refers to a markov-chain model predicting whether a comment is actual text or a random sequence of characters </li>"
"<li> <code>weak_rbs</code> refers to a rule-based system that flags ambiguous comments that should be checked by a human. </li>"
"<li> <code>strong_rbs</code> refers to a rule-based system that flags inappropriate comments that could be removed directly.</ol></p>",
examples=[
"Ik zag gisteren een puppy",
"hljksdfghslkdjfghlsdkfghjslkdjhfg",
"mijn email adress is abc@hotmail.com",
"<H1>Dit is mijn website nu</H1>",
"mijn nicht is een zuster",
"jij bent een klootzak"
]
)
iface.launch(share=False)