Spaces:

CodeMax
/

hatespeech

Runtime error

App Files Files Community

hm-auch commited on Jun 13, 2022

Commit

072c906

1 Parent(s): b642a67

update classifier and demonstrator-code

Browse files

Files changed (12) hide show

README.md +1 -1
app.py +53 -34
gradio_queue.db +0 -0
gradio_queue.db-journal +0 -0
{result/model → model_1}/keras_metadata.pb +0 -0
{result/model → model_1}/saved_model.pb +0 -0
{result/model → model_1}/variables/variables.data-00000-of-00001 +0 -0
{result/model → model_1}/variables/variables.index +0 -0
model_2/keras_metadata.pb +3 -0
model_2/saved_model.pb +3 -0
model_2/variables/variables.data-00000-of-00001 +3 -0
model_2/variables/variables.index +0 -0

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: Hatespeech
 emoji: 😻
 colorFrom: yellow
 colorTo: red

 ---
+title: Hassrede
 emoji: 😻
 colorFrom: yellow
 colorTo: red

app.py CHANGED Viewed

@@ -1,13 +1,9 @@
 import transformers
 import gradio as gr
 import tensorflow as tf
-MODEL_DIRECTORY = './result/model'
-PRETRAINED_MODEL_NAME = 'dbmdz/bert-base-german-cased'
-TOKENIZER = transformers.BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)
-MAX_SEQUENCE_LENGTH = 300
 def encode(sentences, tokenizer, sequence_length):
     return tokenizer.batch_encode_plus(
         sentences,
@@ -19,39 +15,62 @@ def encode(sentences, tokenizer, sequence_length):
         return_tensors='tf'
     )
-hs_detection_model = tf.keras.models.load_model(MODEL_DIRECTORY, compile=True)
-def inference(sentence):
-    encoded_sentence = encode([sentence], TOKENIZER, MAX_SEQUENCE_LENGTH)
-    return hs_detection_model.predict(encoded_sentence.values())
-title = "HS-Detector Demonstrator"
 description = """
 <center>
-<p>Dataset: germeval18_hasoc19_rp21_combi_dataset (17,7% HS)</p>
-<p>Das bisher beste Modell basierend auf Bert nach 2 Epochen und max. 300 Token pro Eintrag fine-tuning mit folgenden Evaluationsergebnissen:</p>
-Accuracy: 0.8794712286158631<br/>
-Balanced Accuracy: 0.7561891312100413<br/>
-Binary F1-Score: 0.6249999999999999<br/>
-Binary Precision: 0.6994584837545126<br/>
-Binary Recall: 0.564868804664723<br/>
-Weighted F1-Score: 0.8742843536656945<br/>
-Weighted Precision: 0.8722794361456155<br/>
-Weighted Recall: 0.8794712286158631<br/>
-Macro F1-Score: 0.7765982087708463<br/>
-Macro Precision: 0.80455672371745<br/>
-Macro Recall: 0.7561891312100413<br/>
-MCC score: 0.558655967312084<br/>
-AUROC score: 0.7561891312100413<br/>
-<img src="https://huggingface.co/spaces/course-demos/Rick_and_Morty_QA/resolve/main/rick.png" width=200px>
 </center>
 """
-article = "Die Eingaben werden nicht geloggt. Klassifikator einfach ausprobieren."
-input_sentence_text = gr.inputs.Textbox(placeholder="Hier den Satz eingeben, der Hassrede enthalten kann.")
-ui = gr.Interface(fn=inference, inputs=input_sentence_text, outputs="text", title = title, description = description, article = article)
-ui.launch()

 import transformers
 import gradio as gr
+import numpy as np
 import tensorflow as tf
 def encode(sentences, tokenizer, sequence_length):
     return tokenizer.batch_encode_plus(
         sentences,
         return_tensors='tf'
     )
+hs_detection_model_1 = tf.keras.models.load_model('./model_1', compile=True)
+hs_detection_model_2 = tf.keras.models.load_model('./model_2', compile=True)
+def model_inference(sentence):
+    encoded_model1_sentence = encode([sentence], transformers.BertTokenizer.from_pretrained('dbmdz/bert-base-german-cased'), 300)
+    encoded_model2_sentence = encode([sentence], transformers.BertTokenizer.from_pretrained('dbmdz/bert-base-german-uncased'), 512)
+    predictions_1 = hs_detection_model_1.predict(encoded_model1_sentence.values()).flatten()
+    predictions_2 = hs_detection_model_2.predict(encoded_model2_sentence.values()).flatten()
+    return {'Hassrede': float(predictions_1[0])}, {'Hassrede': float(predictions_2[0])}
+title = "HS-Detector Demonstrator (deutsch)"
 description = """
+<div style="float: none; overflow: hidden;">
+<div style="display:block; width:100%;">
 <center>
+<div style="width:50%; float: left; display: inline-block;">
+    <h2>Ausgangsmodell</h2>
+    <p>Modell: Bert ('dbmdz/bert-base-german-cased')</p>
+    <p>Dataset: germeval18_hasoc19_rp21_combi_dataset <br/> (77.161 Einträge mit einem Hassrede-Anteil von 17,7%)</p>
+    <p>Fine-Tuning Parameter: 2 Epochen, 300 Token pro Eintrag, 2e-5 LR</p>
+    Evaluationsergebnisse:
+    Balanced Accuracy: 0.756
+    (Accuracy: 0.880)
+    Binary F1-Score: 0.625
+    Binary Precision: 0.699
+    Binary Recall: 0.565
+    MCC score: 0.559
+    AUROC score: 0.756
+</div>
+<div style="width:50%; float: left; display: inline-block;">
+    <h2>Challenger-Modell</h2>
+    <p>Modell: Bert ('dbmdz/bert-base-german-uncased')</p>
+    <p>Dataset: germeval18_hasoc19_rp21_combi_dataset_no-url_no-address  <br/> (~77.161 Einträge mit einem Hassrede-Anteil von 17,7%)</p>
+    <p>Fine-Tuning Parameter: 2 Epochen, 512 Token pro Eintrag, 2e-5 LR</p>
+    Evaluationsergebnisse:
+    Balanced Accuracy: 0.749
+    (Accuracy: 0.867)
+    Binary F1-Score: 0.602
+    Binary Precision: 0.642
+    Binary Recall: 0.567
+    MCC score: 0.524
+    AUROC score: 0.749
+</div>
 </center>
+</div>
+</div>
 """
+# <p>Dataset: germeval18_hasoc19_rp21_glasebach22_combi_dataset_no-addr.csv <br/> (84.239 Einträge mit einem Hassrede-Anteil von 18,2%)</p>
+article = """Die Eingaben werden nicht geloggt. Klassifikator einfach ausprobieren.
+Unter dem Button 'Ersteller' kann inspiziert werden, welche Satz-Bestandteile für die Modelle vermutlich entscheident waren.
+Dabei werden automatisiert Satzteile verändert und die Auswirkungen auf die jeweils abgefragten Predictions beobachtet."""
+input_sentence_text = gr.inputs.Textbox(lines=5, placeholder="Geben Sie hier den Satz ein, der von den Modellen auf Hassrede geprüft werden soll.")
+output_predictions = [gr.outputs.Label(label="Prediction of initial model", num_top_classes=1), gr.outputs.Label(label="Prediction of challenging model", num_top_classes=1)]
+ui = gr.Interface(fn=model_inference, inputs=input_sentence_text, outputs=output_predictions, title=title, article=article, description=description, interpretation="default",
+                  flagging_options=["incorrect", "ambiguous", "other"])
+ui.launch(enable_queue=True)

gradio_queue.db ADDED Viewed

File without changes

gradio_queue.db-journal ADDED Viewed

Binary file (512 Bytes). View file

{result/model → model_1}/keras_metadata.pb RENAMED Viewed

File without changes

{result/model → model_1}/saved_model.pb RENAMED Viewed

File without changes

{result/model → model_1}/variables/variables.data-00000-of-00001 RENAMED Viewed

File without changes

{result/model → model_1}/variables/variables.index RENAMED Viewed

File without changes

model_2/keras_metadata.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b8af43660950e7ee4747371bb148060c46c696cf2141dcccfa48b02fe15d51f6
+size 154814

model_2/saved_model.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f32916473730c6bcf6302fc51828059364bc2d8ccc5d19fac4f633ac47f15073
+size 6564579

model_2/variables/variables.data-00000-of-00001 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:338d6f22e3fa2f63a391c300d0ba96774d08ff685986f1006c55177d11656004
+size 1319386304

model_2/variables/variables.index ADDED Viewed

Binary file (40.7 kB). View file