Spaces:

CodeMax
/

hatespeech

Running

App Files Files Community

hatespeech / app.py

hm-auch

fixed font-color in huggingface theme

502440e about 2 years ago

raw

history blame contribute delete

No virus

3.74 kB

	import transformers

	import gradio as gr
	import numpy as np
	import tensorflow as tf

	def encode(sentences, tokenizer, sequence_length):
	return tokenizer.batch_encode_plus(
	sentences,
	max_length=sequence_length, # set the length of the sequences
	add_special_tokens=True, # add [CLS] and [SEP] tokens
	return_attention_mask=True,
	return_token_type_ids=False, # not needed for this type of ML task
	pad_to_max_length=True, # add 0 pad tokens to the sequences less than max_length
	return_tensors='tf'
	)

	hs_detection_model_1 = tf.keras.models.load_model('./model_1', compile=True)
	hs_detection_model_2 = tf.keras.models.load_model('./model_2', compile=True)

	def model_inference(sentence):
	encoded_model1_sentence = encode([sentence], transformers.BertTokenizer.from_pretrained('dbmdz/bert-base-german-cased'), 300)
	encoded_model2_sentence = encode([sentence], transformers.BertTokenizer.from_pretrained('dbmdz/bert-base-german-uncased'), 512)
	predictions_1 = hs_detection_model_1.predict(encoded_model1_sentence.values()).flatten()
	predictions_2 = hs_detection_model_2.predict(encoded_model2_sentence.values()).flatten()
	return {'Hassrede': float(predictions_1[0])}, {'Hassrede': float(predictions_2[0])}

	title = "HS-Detector Demonstrator (deutsch)"
	description = """
	<div style="float: none; overflow: hidden;">
	<div style="display:block; width:100%;">
	<center>
	<div style="width:50%; float: left; display: inline-block;">
	<h2>Ausgangsmodell</h2>
	<p>Modell: Bert ('dbmdz/bert-base-german-cased')</p>
	<p>Dataset: germeval18_hasoc19_rp21_combi_dataset <br/> (77.161 Einträge mit einem Hassrede-Anteil von 17,7%)</p>
	<p>Fine-Tuning Parameter: 2 Epochen, 300 Token pro Eintrag, 2e-5 LR</p>

	Evaluationsergebnisse:
	Balanced Accuracy: 0.756
	(Accuracy: 0.880)
	Binary F1-Score: 0.625
	Binary Precision: 0.699
	Binary Recall: 0.565
	MCC score: 0.559
	AUROC score: 0.756
	</div>
	<div style="width:50%; float: left; display: inline-block;">
	<h2>Challenger-Modell</h2>
	<p>Modell: Bert ('dbmdz/bert-base-german-uncased')</p>
	<p>Dataset: germeval18_hasoc19_rp21_combi_dataset_no-url_no-address <br/> (~77.161 Einträge mit einem Hassrede-Anteil von 17,7%)</p>
	<p>Fine-Tuning Parameter: 2 Epochen, 512 Token pro Eintrag, 2e-5 LR</p>

	Evaluationsergebnisse:
	Balanced Accuracy: 0.749
	(Accuracy: 0.867)
	Binary F1-Score: 0.602
	Binary Precision: 0.642
	Binary Recall: 0.567
	MCC score: 0.524
	AUROC score: 0.749
	</div>
	</center>
	</div>
	</div>
	"""
	# <p>Dataset: germeval18_hasoc19_rp21_glasebach22_combi_dataset_no-addr.csv <br/> (84.239 Einträge mit einem Hassrede-Anteil von 18,2%)</p>
	article = """Die Eingaben werden nicht geloggt. Klassifikator einfach ausprobieren.
	Unter dem Button 'Ersteller' kann inspiziert werden, welche Satz-Bestandteile für die Modelle vermutlich entscheident waren.
	Dabei werden automatisiert Satzteile verändert und die Auswirkungen auf die jeweils abgefragten Predictions beobachtet."""

	input_sentence_text = gr.inputs.Textbox(lines=5, placeholder="Geben Sie hier den Satz ein, der von den Modellen auf Hassrede geprüft werden soll.")
	output_predictions = [gr.outputs.Label(label="Prediction of initial model", num_top_classes=1), gr.outputs.Label(label="Prediction of challenging model", num_top_classes=1)]
	ui = gr.Interface(fn=model_inference, inputs=input_sentence_text, outputs=output_predictions, title=title, article=article, description=description, interpretation="default",
	flagging_options=["incorrect", "ambiguous", "other"], theme="huggingface", css=".confidence {color: black !important}")
	ui.launch(enable_queue=True)