TRHatePrint / app.py
gokceuludogan's picture
Update app.py
be22fa8 verified
import gradio as gr
from transformers import pipeline, AutoTokenizer
from turkish_lm_tuner import T5ForClassification
import os
# Retrieve Hugging Face authentication token from environment variables
hf_auth_token = os.getenv('HF_AUTH_TOKEN')
print(hf_auth_token)
# Example inputs for the different tasks
binary_classification_examples = [["Yahudi terörüne karşı protestolar kararlılıkla devam ediyor."]]
categorization_examples = [["Ermeni zulmü sırasında hayatını kaybeden kadınlar anısına dikilen anıt ziyarete açıldı."]]
target_detection_examples = [["Dün 5 bin suriyeli enik doğmuştur zaten Türkiyede 5 bin suriyelinin gitmesi çok çok az"]]
# Application description and citation placeholder
APP_DESCRIPTION = """
## Hate Speech Detection in Turkish News
This tool performs hate speech detection across several tasks, including binary classification, categorization, and target detection. Choose a model and input text to analyze its hatefulness, categorize it, or detect targets of hate speech.
"""
APP_CITATION = """
For citation, please refer to the tool's documentation.
"""
binary_mapping = {
'LABEL_0': 'non-hateful',
'LABEL_1': 'hateful',
}
category_mapping = {
'LABEL_0': 'non-hateful',
'LABEL_1': 'symbolization',
'LABEL_2': 'exaggeration/generalization/attribution/distortion',
'LABEL_3': 'swearing/insult/defamation/dehumanization',
'LABEL_4': 'threat of enmity/war/attack/murder/harm',
}
def inference_t5(input_text, selected_model):
model = T5ForClassification.from_pretrained("gokceuludogan/turna_tr_hateprint_w0.1_new_") #_b128")
tokenizer = AutoTokenizer.from_pretrained("gokceuludogan/turna_tr_hateprint_w0.1_new_") #_b128")
return model(**tokenizer(input_text, return_tensors='pt')).logits
# Functions for model-based tasks
def perform_binary_classification(input_text, selected_model):
if (selected_model is not None) and ('turna' in selected_model):
return inference_t5(input_text, selected_model)
model = pipeline(model=f'gokceuludogan/{selected_model}')
return binary_mapping.get(model(input_text)[0]['label'], 'error')
def perform_categorization(input_text):
model = pipeline(model=f'gokceuludogan/berturk_tr_hateprint_cat_w0.1_b128')
return category_mapping.get(model(input_text)[0]['label'], 'error')
def perform_target_detection(input_text):
model = pipeline(model='gokceuludogan/turna_generation_tr_hateprint_target')
return model(input_text)[0]['generated_text']
def perform_multi_detection(input_text):
model = pipeline(model='gokceuludogan/turna_generation_tr_hateprint_multi')
return model(input_text)[0]['generated_text']
# Gradio interface
with gr.Blocks(theme="abidlabs/Lime") as hate_speech_demo:
# Main description
with gr.Tab("About"):
gr.Markdown(APP_DESCRIPTION)
# Binary Classification Tab
with gr.Tab("Binary Classification"):
gr.Markdown("Analyze the hatefulness of a given text using selected models.")
with gr.Column():
model_choice_binary = gr.Radio(
choices=[
"turna_tr_hateprint_w0.1_new_",
"berturk_tr_hateprint_w0.1",
],
label="Select Model",
value="turna_tr_hateprint"
)
text_input_binary = gr.Textbox(label="Input Text")
classify_button = gr.Button("Analyze")
classification_output = gr.Textbox(label="Classification Result")
classify_button.click(
perform_binary_classification,
inputs=[text_input_binary, model_choice_binary],
outputs=classification_output
)
# Hate Speech Categorization Tab
with gr.Tab("Hate Speech Categorization"):
gr.Markdown("Categorize the hate speech type in the provided text.")
with gr.Column():
text_input_category = gr.Textbox(label="Input Text")
categorize_button = gr.Button("Categorize")
categorization_output = gr.Textbox(label="Categorization Result")
categorize_button.click(
perform_categorization,
inputs=[text_input_category],
outputs=categorization_output
)
# Target Detection Tab
with gr.Tab("Target Detection"):
gr.Markdown("Detect the targets of hate speech in the provided text.")
with gr.Column():
text_input_target = gr.Textbox(label="Input Text")
target_button = gr.Button("Detect Targets")
target_output = gr.Textbox(label="Target Detection Result")
target_button.click(
perform_target_detection,
inputs=[text_input_target],
outputs=target_output
)
# Multi Detection Tab
with gr.Tab("Multi Detection"):
gr.Markdown("Detect hate speech, its category, and its targets in the text.")
with gr.Column():
text_input_multi = gr.Textbox(label="Input Text")
multi_button = gr.Button("Detect All")
multi_output = gr.Textbox(label="Multi Detection Result")
multi_button.click(
perform_multi_detection,
inputs=[text_input_multi],
outputs=multi_output
)
# Citation Section
gr.Markdown(APP_CITATION)
# Launch the application
hate_speech_demo.launch()