import gradio as gr from huggingface_hub import from_pretrained_fastai from src.start_up.start_up_rbs import create_weak_rbs, create_strong_rbs from src.start_up.start_up_gibberish import create_gibberish_detector # Start up modules # Initiate model learner = from_pretrained_fastai("nosdigitalmedia/dutch-youth-comment-classifier") verdict_map = { 0: 'Inappropriate', 1: 'Allowed' } # Initiate rule based systems (RBS) weak_rbs = create_weak_rbs() strong_rbs = create_strong_rbs() # Initiate gibberish detector gibberish_detector = create_gibberish_detector() # Define function to judge comment def predict(input_text): # Get model predictions predictions = learner.predict(input_text) # Pass through weak RBS allows_weak, reasons_weak, highlights_weak = weak_rbs.allows(input_text) # Pass through strong RBS allows_strong, reasons_strong, highlights_strong = strong_rbs.allows(input_text) # Pass through gibberish detector is_gibberish_free = gibberish_detector.predict(input_text) # Construct json respons to_return = { 'model': { 'allowed': bool(int(predictions[0])), 'verdict': verdict_map[int(predictions[0])], 'highlights': [], 'reasons': ['Machine learning model does not approve' for el in [predictions[0]] if not int(el)] }, 'gibberish': { 'allowed': bool(is_gibberish_free), 'verdict': verdict_map[int(is_gibberish_free)], 'highlights': [], 'reasons': ['Comment text contains gibberish' for el in [is_gibberish_free] if not el] }, 'weak_rbs': { 'allowed': allows_weak, 'verdict': verdict_map[allows_weak], 'highlights': highlights_weak, 'reasons': reasons_weak }, 'strong_rbs': { 'allowed': allows_strong, 'verdict': verdict_map[allows_strong], 'highlights': highlights_strong, 'reasons': reasons_strong }, } return to_return # Set up app iface = gr.Interface( fn=predict, inputs="text", outputs="json", title="Dutch Youth Comment Classifier", description="

Input a comment and our model and rule based system will tell you if the comment is appropriate for " "youth. Four systems are used to judge the comment: " "

  1. model refers to the Deep Learning model trained on moderator decisions as provided here.
  2. " "
  3. gibberish refers to a markov-chain model predicting whether a comment is actual text or a random sequence of characters
  4. " "
  5. weak_rbs refers to a rule-based system that flags ambiguous comments that should be checked by a human.
  6. " "
  7. strong_rbs refers to a rule-based system that flags inappropriate comments that could be removed directly.

", examples=[ "Ik zag gisteren een puppy", "hljksdfghslkdjfghlsdkfghjslkdjhfg", "mijn email adress is abc@hotmail.com", "

Dit is mijn website nu

", "mijn nicht is een zuster", "jij bent een klootzak" ] ) iface.launch(share=False)