Spaces:

CodeMax
/

hatespeech

Runtime error

App Files Files Community

hm-auch commited on May 29, 2022

Commit

b642a67

1 Parent(s): 86814fc

update classifier and demonstrator-code

Browse files

Files changed (8) hide show

README.md +1 -1
app.py +44 -31
hscommon.py +0 -13
{save/modelV1 → result/model}/keras_metadata.pb +2 -2
{save/modelV1 → result/model}/saved_model.pb +2 -2
{save/modelV1 → result/model}/variables/variables.data-00000-of-00001 +2 -2
result/model/variables/variables.index +0 -0
save/modelV1/variables/variables.index +0 -0

README.md CHANGED Viewed

@@ -10,4 +10,4 @@ pinned: false
 license: afl-3.0
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference

 license: afl-3.0
 ---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference

app.py CHANGED Viewed

@@ -1,44 +1,57 @@
 import transformers
-import hscommon
 import gradio as gr
 import tensorflow as tf
-from official.nlp import optimization  # to create AdamW optimizer
-MODEL_DIRECTORY = 'save/modelV1'
 PRETRAINED_MODEL_NAME = 'dbmdz/bert-base-german-cased'
 TOKENIZER = transformers.BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)
-MAX_SEQUENCE_LENGTH = 256
-EPOCHS = 2
-OPTIMIZER = 'adamw'
-INIT_LR = 3e-5
-LOSS = tf.keras.losses.BinaryCrossentropy(from_logits=False)
-METRICS = tf.metrics.BinaryAccuracy()
-def compile_model(model):
-    steps_per_epoch = 10
-    num_train_steps = steps_per_epoch * EPOCHS
-    num_warmup_steps = int(0.1*num_train_steps)
-    optimizer = optimization.create_optimizer(
-        init_lr=INIT_LR,
-        num_train_steps=steps_per_epoch,
-        num_warmup_steps=num_warmup_steps,
-        optimizer_type=OPTIMIZER
     )
-    model.compile(optimizer=optimizer, loss=LOSS, metrics=[METRICS])
-    return model
-hs_detection_model = tf.keras.models.load_model(MODEL_DIRECTORY, compile=False)
-compile_model(hs_detection_model)
 def inference(sentence):
-    encoded_sentence = hscommon.encode([sentence], TOKENIZER, MAX_SEQUENCE_LENGTH)
-    predicition = hs_detection_model.predict(encoded_sentence.values())
-    return predicition
 input_sentence_text = gr.inputs.Textbox(placeholder="Hier den Satz eingeben, der Hassrede enthalten kann.")
-iface = gr.Interface(fn=inference, inputs=input_sentence_text, outputs="text")
-iface.launch()

 import transformers
 import gradio as gr
 import tensorflow as tf
+MODEL_DIRECTORY = './result/model'
 PRETRAINED_MODEL_NAME = 'dbmdz/bert-base-german-cased'
 TOKENIZER = transformers.BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)
+MAX_SEQUENCE_LENGTH = 300
+def encode(sentences, tokenizer, sequence_length):
+    return tokenizer.batch_encode_plus(
+        sentences,
+        max_length=sequence_length, # set the length of the sequences
+        add_special_tokens=True, # add [CLS] and [SEP] tokens
+        return_attention_mask=True,
+        return_token_type_ids=False, # not needed for this type of ML task
+        pad_to_max_length=True, # add 0 pad tokens to the sequences less than max_length
+        return_tensors='tf'
     )
+hs_detection_model = tf.keras.models.load_model(MODEL_DIRECTORY, compile=True)
 def inference(sentence):
+    encoded_sentence = encode([sentence], TOKENIZER, MAX_SEQUENCE_LENGTH)
+    return hs_detection_model.predict(encoded_sentence.values())
+title = "HS-Detector Demonstrator"
+description = """
+<center>
+<p>Dataset: germeval18_hasoc19_rp21_combi_dataset (17,7% HS)</p>
+<p>Das bisher beste Modell basierend auf Bert nach 2 Epochen und max. 300 Token pro Eintrag fine-tuning mit folgenden Evaluationsergebnissen:</p>
+Accuracy: 0.8794712286158631<br/>
+Balanced Accuracy: 0.7561891312100413<br/>
+Binary F1-Score: 0.6249999999999999<br/>
+Binary Precision: 0.6994584837545126<br/>
+Binary Recall: 0.564868804664723<br/>
+Weighted F1-Score: 0.8742843536656945<br/>
+Weighted Precision: 0.8722794361456155<br/>
+Weighted Recall: 0.8794712286158631<br/>
+Macro F1-Score: 0.7765982087708463<br/>
+Macro Precision: 0.80455672371745<br/>
+Macro Recall: 0.7561891312100413<br/>
+MCC score: 0.558655967312084<br/>
+AUROC score: 0.7561891312100413<br/>
+<img src="https://huggingface.co/spaces/course-demos/Rick_and_Morty_QA/resolve/main/rick.png" width=200px>
+</center>
+"""
+article = "Die Eingaben werden nicht geloggt. Klassifikator einfach ausprobieren."
 input_sentence_text = gr.inputs.Textbox(placeholder="Hier den Satz eingeben, der Hassrede enthalten kann.")
+ui = gr.Interface(fn=inference, inputs=input_sentence_text, outputs="text", title = title, description = description, article = article)
+ui.launch()

hscommon.py DELETED Viewed

@@ -1,13 +0,0 @@
-def encode(sentences, tokenizer, sequence_length):
-    return tokenizer.batch_encode_plus(
-        sentences,
-        max_length=sequence_length, # set the length of the sequences
-        add_special_tokens=True, # add [CLS] and [SEP] tokens
-        return_attention_mask=True,
-        return_token_type_ids=False, # not needed for this type of ML task
-        pad_to_max_length=True, # add 0 pad tokens to the sequences less than max_length
-        return_tensors='tf'
-    )

{save/modelV1 → result/model}/keras_metadata.pb RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5972b15e2cc31b72e34a938e1d9614cf6a9d2b1872abbc654453ab5f613693a2
-size 155363

 version https://git-lfs.github.com/spec/v1
+oid sha256:805488d800d068dbc81f561789b35c1fe524012434890af914e94166ac17497d
+size 154871

{save/modelV1 → result/model}/saved_model.pb RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ccb2afb1ffb3c867ae345e7d43af0e20f3f476c83cb86c838a07bf703c216fa4
-size 6651882

 version https://git-lfs.github.com/spec/v1
+oid sha256:01b6489c30792f60f29d622bd7b0e3985fa863165907c8fded7fb2b1029cc421
+size 6564579

{save/modelV1 → result/model}/variables/variables.data-00000-of-00001 RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7940b2394fa52153c18be7b6007423ca74bb4d346b1f808c5ca94e4951090140
-size 1319391849

 version https://git-lfs.github.com/spec/v1
+oid sha256:879c45b816164b7f5919fd5c42ec35d34f1eb266ee3b28a7c210bcb23f5a6d86
+size 1319386304

result/model/variables/variables.index ADDED Viewed

Binary file (40.7 kB). View file

save/modelV1/variables/variables.index DELETED Viewed

Binary file (40.6 kB)