Spaces:
Runtime error
Runtime error
hm-auch
commited on
Commit
·
b642a67
1
Parent(s):
86814fc
update classifier and demonstrator-code
Browse files- README.md +1 -1
- app.py +44 -31
- hscommon.py +0 -13
- {save/modelV1 → result/model}/keras_metadata.pb +2 -2
- {save/modelV1 → result/model}/saved_model.pb +2 -2
- {save/modelV1 → result/model}/variables/variables.data-00000-of-00001 +2 -2
- result/model/variables/variables.index +0 -0
- save/modelV1/variables/variables.index +0 -0
README.md
CHANGED
@@ -10,4 +10,4 @@ pinned: false
|
|
10 |
license: afl-3.0
|
11 |
---
|
12 |
|
13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
|
|
|
10 |
license: afl-3.0
|
11 |
---
|
12 |
|
13 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
|
app.py
CHANGED
@@ -1,44 +1,57 @@
|
|
1 |
import transformers
|
2 |
-
import hscommon
|
3 |
|
4 |
import gradio as gr
|
5 |
import tensorflow as tf
|
6 |
|
7 |
-
|
8 |
-
|
9 |
-
MODEL_DIRECTORY = 'save/modelV1'
|
10 |
PRETRAINED_MODEL_NAME = 'dbmdz/bert-base-german-cased'
|
11 |
TOKENIZER = transformers.BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)
|
12 |
-
MAX_SEQUENCE_LENGTH =
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
optimizer = optimization.create_optimizer(
|
25 |
-
init_lr=INIT_LR,
|
26 |
-
num_train_steps=steps_per_epoch,
|
27 |
-
num_warmup_steps=num_warmup_steps,
|
28 |
-
optimizer_type=OPTIMIZER
|
29 |
)
|
30 |
|
31 |
-
|
32 |
-
return model
|
33 |
-
|
34 |
-
hs_detection_model = tf.keras.models.load_model(MODEL_DIRECTORY, compile=False)
|
35 |
-
compile_model(hs_detection_model)
|
36 |
|
37 |
def inference(sentence):
|
38 |
-
encoded_sentence =
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
input_sentence_text = gr.inputs.Textbox(placeholder="Hier den Satz eingeben, der Hassrede enthalten kann.")
|
43 |
-
|
44 |
-
|
|
|
1 |
import transformers
|
|
|
2 |
|
3 |
import gradio as gr
|
4 |
import tensorflow as tf
|
5 |
|
6 |
+
MODEL_DIRECTORY = './result/model'
|
|
|
|
|
7 |
PRETRAINED_MODEL_NAME = 'dbmdz/bert-base-german-cased'
|
8 |
TOKENIZER = transformers.BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)
|
9 |
+
MAX_SEQUENCE_LENGTH = 300
|
10 |
+
|
11 |
+
def encode(sentences, tokenizer, sequence_length):
|
12 |
+
return tokenizer.batch_encode_plus(
|
13 |
+
sentences,
|
14 |
+
max_length=sequence_length, # set the length of the sequences
|
15 |
+
add_special_tokens=True, # add [CLS] and [SEP] tokens
|
16 |
+
return_attention_mask=True,
|
17 |
+
return_token_type_ids=False, # not needed for this type of ML task
|
18 |
+
pad_to_max_length=True, # add 0 pad tokens to the sequences less than max_length
|
19 |
+
return_tensors='tf'
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
)
|
21 |
|
22 |
+
hs_detection_model = tf.keras.models.load_model(MODEL_DIRECTORY, compile=True)
|
|
|
|
|
|
|
|
|
23 |
|
24 |
def inference(sentence):
|
25 |
+
encoded_sentence = encode([sentence], TOKENIZER, MAX_SEQUENCE_LENGTH)
|
26 |
+
return hs_detection_model.predict(encoded_sentence.values())
|
27 |
+
|
28 |
+
|
29 |
+
title = "HS-Detector Demonstrator"
|
30 |
+
description = """
|
31 |
+
<center>
|
32 |
+
<p>Dataset: germeval18_hasoc19_rp21_combi_dataset (17,7% HS)</p>
|
33 |
+
<p>Das bisher beste Modell basierend auf Bert nach 2 Epochen und max. 300 Token pro Eintrag fine-tuning mit folgenden Evaluationsergebnissen:</p>
|
34 |
+
|
35 |
+
Accuracy: 0.8794712286158631<br/>
|
36 |
+
Balanced Accuracy: 0.7561891312100413<br/>
|
37 |
+
Binary F1-Score: 0.6249999999999999<br/>
|
38 |
+
Binary Precision: 0.6994584837545126<br/>
|
39 |
+
Binary Recall: 0.564868804664723<br/>
|
40 |
+
Weighted F1-Score: 0.8742843536656945<br/>
|
41 |
+
Weighted Precision: 0.8722794361456155<br/>
|
42 |
+
Weighted Recall: 0.8794712286158631<br/>
|
43 |
+
Macro F1-Score: 0.7765982087708463<br/>
|
44 |
+
Macro Precision: 0.80455672371745<br/>
|
45 |
+
Macro Recall: 0.7561891312100413<br/>
|
46 |
+
MCC score: 0.558655967312084<br/>
|
47 |
+
AUROC score: 0.7561891312100413<br/>
|
48 |
+
|
49 |
+
<img src="https://huggingface.co/spaces/course-demos/Rick_and_Morty_QA/resolve/main/rick.png" width=200px>
|
50 |
+
</center>
|
51 |
+
"""
|
52 |
+
|
53 |
+
article = "Die Eingaben werden nicht geloggt. Klassifikator einfach ausprobieren."
|
54 |
|
55 |
input_sentence_text = gr.inputs.Textbox(placeholder="Hier den Satz eingeben, der Hassrede enthalten kann.")
|
56 |
+
ui = gr.Interface(fn=inference, inputs=input_sentence_text, outputs="text", title = title, description = description, article = article)
|
57 |
+
ui.launch()
|
hscommon.py
DELETED
@@ -1,13 +0,0 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
def encode(sentences, tokenizer, sequence_length):
|
5 |
-
return tokenizer.batch_encode_plus(
|
6 |
-
sentences,
|
7 |
-
max_length=sequence_length, # set the length of the sequences
|
8 |
-
add_special_tokens=True, # add [CLS] and [SEP] tokens
|
9 |
-
return_attention_mask=True,
|
10 |
-
return_token_type_ids=False, # not needed for this type of ML task
|
11 |
-
pad_to_max_length=True, # add 0 pad tokens to the sequences less than max_length
|
12 |
-
return_tensors='tf'
|
13 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
{save/modelV1 → result/model}/keras_metadata.pb
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:805488d800d068dbc81f561789b35c1fe524012434890af914e94166ac17497d
|
3 |
+
size 154871
|
{save/modelV1 → result/model}/saved_model.pb
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:01b6489c30792f60f29d622bd7b0e3985fa863165907c8fded7fb2b1029cc421
|
3 |
+
size 6564579
|
{save/modelV1 → result/model}/variables/variables.data-00000-of-00001
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:879c45b816164b7f5919fd5c42ec35d34f1eb266ee3b28a7c210bcb23f5a6d86
|
3 |
+
size 1319386304
|
result/model/variables/variables.index
ADDED
Binary file (40.7 kB). View file
|
|
save/modelV1/variables/variables.index
DELETED
Binary file (40.6 kB)
|
|