|
import gradio as gr |
|
from huggingface_hub import from_pretrained_fastai |
|
|
|
from src.start_up.start_up_rbs import create_weak_rbs, create_strong_rbs |
|
from src.start_up.start_up_gibberish import create_gibberish_detector |
|
|
|
|
|
|
|
learner = from_pretrained_fastai("nosdigitalmedia/dutch-youth-comment-classifier") |
|
verdict_map = { |
|
0: 'Inappropriate', |
|
1: 'Allowed' |
|
} |
|
|
|
|
|
weak_rbs = create_weak_rbs() |
|
strong_rbs = create_strong_rbs() |
|
|
|
|
|
gibberish_detector = create_gibberish_detector() |
|
|
|
|
|
|
|
def predict(input_text): |
|
|
|
|
|
predictions = learner.predict(input_text) |
|
|
|
|
|
allows_weak, reasons_weak, highlights_weak = weak_rbs.allows(input_text) |
|
|
|
|
|
allows_strong, reasons_strong, highlights_strong = strong_rbs.allows(input_text) |
|
|
|
|
|
is_gibberish_free = gibberish_detector.predict(input_text) |
|
|
|
|
|
to_return = { |
|
'model': { |
|
'allowed': bool(int(predictions[0])), |
|
'verdict': verdict_map[int(predictions[0])], |
|
'highlights': [], |
|
'reasons': ['Machine learning model does not approve' for el in [predictions[0]] if not int(el)] |
|
}, |
|
'gibberish': { |
|
'allowed': bool(is_gibberish_free), |
|
'verdict': verdict_map[int(is_gibberish_free)], |
|
'highlights': [], |
|
'reasons': ['Comment text contains gibberish' for el in [is_gibberish_free] if not el] |
|
}, |
|
'weak_rbs': { |
|
'allowed': allows_weak, |
|
'verdict': verdict_map[allows_weak], |
|
'highlights': highlights_weak, |
|
'reasons': reasons_weak |
|
}, |
|
'strong_rbs': { |
|
'allowed': allows_strong, |
|
'verdict': verdict_map[allows_strong], |
|
'highlights': highlights_strong, |
|
'reasons': reasons_strong |
|
}, |
|
} |
|
|
|
return to_return |
|
|
|
|
|
|
|
iface = gr.Interface( |
|
fn=predict, |
|
inputs="text", |
|
outputs="json", |
|
title="Dutch Youth Comment Classifier", |
|
description="<p>Input a comment and our model and rule based system will tell you if the comment is appropriate for " |
|
"youth. Four systems are used to judge the comment: " |
|
"<ol> <li> <code>model</code> refers to the Deep Learning model trained on moderator decisions as provided <a href=\"https://huggingface.co/nosdigitalmedia/dutch-youth-comment-classifier\">here</a>. </li>" |
|
"<li> <code>gibberish</code> refers to a markov-chain model predicting whether a comment is actual text or a random sequence of characters </li>" |
|
"<li> <code>weak_rbs</code> refers to a rule-based system that flags ambiguous comments that should be checked by a human. </li>" |
|
"<li> <code>strong_rbs</code> refers to a rule-based system that flags inappropriate comments that could be removed directly.</ol></p>", |
|
examples=[ |
|
"Ik zag gisteren een puppy", |
|
"hljksdfghslkdjfghlsdkfghjslkdjhfg", |
|
"mijn email adress is abc@hotmail.com", |
|
"<H1>Dit is mijn website nu</H1>", |
|
"mijn nicht is een zuster", |
|
"jij bent een klootzak" |
|
] |
|
) |
|
iface.launch(share=False) |
|
|
|
|