hm-auch commited on
Commit
b642a67
1 Parent(s): 86814fc

update classifier and demonstrator-code

Browse files
README.md CHANGED
@@ -10,4 +10,4 @@ pinned: false
10
  license: afl-3.0
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
 
10
  license: afl-3.0
11
  ---
12
 
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
app.py CHANGED
@@ -1,44 +1,57 @@
1
  import transformers
2
- import hscommon
3
 
4
  import gradio as gr
5
  import tensorflow as tf
6
 
7
- from official.nlp import optimization # to create AdamW optimizer
8
-
9
- MODEL_DIRECTORY = 'save/modelV1'
10
  PRETRAINED_MODEL_NAME = 'dbmdz/bert-base-german-cased'
11
  TOKENIZER = transformers.BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)
12
- MAX_SEQUENCE_LENGTH = 256
13
- EPOCHS = 2
14
- OPTIMIZER = 'adamw'
15
- INIT_LR = 3e-5
16
- LOSS = tf.keras.losses.BinaryCrossentropy(from_logits=False)
17
- METRICS = tf.metrics.BinaryAccuracy()
18
-
19
- def compile_model(model):
20
- steps_per_epoch = 10
21
- num_train_steps = steps_per_epoch * EPOCHS
22
- num_warmup_steps = int(0.1*num_train_steps)
23
-
24
- optimizer = optimization.create_optimizer(
25
- init_lr=INIT_LR,
26
- num_train_steps=steps_per_epoch,
27
- num_warmup_steps=num_warmup_steps,
28
- optimizer_type=OPTIMIZER
29
  )
30
 
31
- model.compile(optimizer=optimizer, loss=LOSS, metrics=[METRICS])
32
- return model
33
-
34
- hs_detection_model = tf.keras.models.load_model(MODEL_DIRECTORY, compile=False)
35
- compile_model(hs_detection_model)
36
 
37
  def inference(sentence):
38
- encoded_sentence = hscommon.encode([sentence], TOKENIZER, MAX_SEQUENCE_LENGTH)
39
- predicition = hs_detection_model.predict(encoded_sentence.values())
40
- return predicition
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  input_sentence_text = gr.inputs.Textbox(placeholder="Hier den Satz eingeben, der Hassrede enthalten kann.")
43
- iface = gr.Interface(fn=inference, inputs=input_sentence_text, outputs="text")
44
- iface.launch()
 
1
  import transformers
 
2
 
3
  import gradio as gr
4
  import tensorflow as tf
5
 
6
+ MODEL_DIRECTORY = './result/model'
 
 
7
  PRETRAINED_MODEL_NAME = 'dbmdz/bert-base-german-cased'
8
  TOKENIZER = transformers.BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)
9
+ MAX_SEQUENCE_LENGTH = 300
10
+
11
+ def encode(sentences, tokenizer, sequence_length):
12
+ return tokenizer.batch_encode_plus(
13
+ sentences,
14
+ max_length=sequence_length, # set the length of the sequences
15
+ add_special_tokens=True, # add [CLS] and [SEP] tokens
16
+ return_attention_mask=True,
17
+ return_token_type_ids=False, # not needed for this type of ML task
18
+ pad_to_max_length=True, # add 0 pad tokens to the sequences less than max_length
19
+ return_tensors='tf'
 
 
 
 
 
 
20
  )
21
 
22
+ hs_detection_model = tf.keras.models.load_model(MODEL_DIRECTORY, compile=True)
 
 
 
 
23
 
24
  def inference(sentence):
25
+ encoded_sentence = encode([sentence], TOKENIZER, MAX_SEQUENCE_LENGTH)
26
+ return hs_detection_model.predict(encoded_sentence.values())
27
+
28
+
29
+ title = "HS-Detector Demonstrator"
30
+ description = """
31
+ <center>
32
+ <p>Dataset: germeval18_hasoc19_rp21_combi_dataset (17,7% HS)</p>
33
+ <p>Das bisher beste Modell basierend auf Bert nach 2 Epochen und max. 300 Token pro Eintrag fine-tuning mit folgenden Evaluationsergebnissen:</p>
34
+
35
+ Accuracy: 0.8794712286158631<br/>
36
+ Balanced Accuracy: 0.7561891312100413<br/>
37
+ Binary F1-Score: 0.6249999999999999<br/>
38
+ Binary Precision: 0.6994584837545126<br/>
39
+ Binary Recall: 0.564868804664723<br/>
40
+ Weighted F1-Score: 0.8742843536656945<br/>
41
+ Weighted Precision: 0.8722794361456155<br/>
42
+ Weighted Recall: 0.8794712286158631<br/>
43
+ Macro F1-Score: 0.7765982087708463<br/>
44
+ Macro Precision: 0.80455672371745<br/>
45
+ Macro Recall: 0.7561891312100413<br/>
46
+ MCC score: 0.558655967312084<br/>
47
+ AUROC score: 0.7561891312100413<br/>
48
+
49
+ <img src="https://huggingface.co/spaces/course-demos/Rick_and_Morty_QA/resolve/main/rick.png" width=200px>
50
+ </center>
51
+ """
52
+
53
+ article = "Die Eingaben werden nicht geloggt. Klassifikator einfach ausprobieren."
54
 
55
  input_sentence_text = gr.inputs.Textbox(placeholder="Hier den Satz eingeben, der Hassrede enthalten kann.")
56
+ ui = gr.Interface(fn=inference, inputs=input_sentence_text, outputs="text", title = title, description = description, article = article)
57
+ ui.launch()
hscommon.py DELETED
@@ -1,13 +0,0 @@
1
-
2
-
3
-
4
- def encode(sentences, tokenizer, sequence_length):
5
- return tokenizer.batch_encode_plus(
6
- sentences,
7
- max_length=sequence_length, # set the length of the sequences
8
- add_special_tokens=True, # add [CLS] and [SEP] tokens
9
- return_attention_mask=True,
10
- return_token_type_ids=False, # not needed for this type of ML task
11
- pad_to_max_length=True, # add 0 pad tokens to the sequences less than max_length
12
- return_tensors='tf'
13
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
{save/modelV1 → result/model}/keras_metadata.pb RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5972b15e2cc31b72e34a938e1d9614cf6a9d2b1872abbc654453ab5f613693a2
3
- size 155363
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:805488d800d068dbc81f561789b35c1fe524012434890af914e94166ac17497d
3
+ size 154871
{save/modelV1 → result/model}/saved_model.pb RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ccb2afb1ffb3c867ae345e7d43af0e20f3f476c83cb86c838a07bf703c216fa4
3
- size 6651882
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01b6489c30792f60f29d622bd7b0e3985fa863165907c8fded7fb2b1029cc421
3
+ size 6564579
{save/modelV1 → result/model}/variables/variables.data-00000-of-00001 RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7940b2394fa52153c18be7b6007423ca74bb4d346b1f808c5ca94e4951090140
3
- size 1319391849
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:879c45b816164b7f5919fd5c42ec35d34f1eb266ee3b28a7c210bcb23f5a6d86
3
+ size 1319386304
result/model/variables/variables.index ADDED
Binary file (40.7 kB). View file
 
save/modelV1/variables/variables.index DELETED
Binary file (40.6 kB)