import streamlit as st from keras.models import load_model import nltk import re from nltk.tokenize import TweetTokenizer from tensorflow.keras.preprocessing.text import Tokenizer from tensorflow.keras.preprocessing.sequence import pad_sequences import subprocess import numpy as np # Download NLTK stopwords if not already downloaded try: nltk.data.find('corpora/stopwords') except LookupError: nltk.download('stopwords') # Additional imports from nltk.corpus import stopwords # Download NLTK punkt tokenizer if not already downloaded try: nltk.data.find('tokenizers/punkt/PY3/english.pickle') except LookupError: nltk.download('punkt') # Additional imports from nltk.tokenize import word_tokenize # Load the LSTM model model_path = "./my_model.h5" # Set your model path here def load_lstm_model(model_path): return load_model(model_path) def clean_text(text): # Remove stopwords stop_words = set(stopwords.words('english')) words = word_tokenize(text) filtered_words = [word for word in words if word not in stop_words] # Remove Twitter usernames text = re.sub(r'@\w+', '', ' '.join(filtered_words)) # Remove URLs text = re.sub(r'http\S+', '', text) # Tokenize using TweetTokenizer tokenizer = TweetTokenizer(preserve_case=True) text = tokenizer.tokenize(text) # Remove hashtag symbols text = [word.replace('#', '') for word in text] # Remove short words text = ' '.join([word.lower() for word in text if len(word) > 2]) # Remove digits text = re.sub(r'\d+', '', text) # Remove non-alphanumeric characters text = re.sub(r'[^a-zA-Z\s]', '', text) return text def preprocess_text(text): # Clean the text cleaned_text = clean_text(text) # Tokenize and pad sequences token = Tokenizer() token.fit_on_texts([cleaned_text]) text_sequences = token.texts_to_sequences([cleaned_text]) padded_sequences = pad_sequences(text_sequences, maxlen=100) return padded_sequences # Function to predict hate speech def predict_hate_speech(text, lstm_model): # Preprocess the text padded_sequences = preprocess_text(text) prediction = lstm_model.predict(padded_sequences) return prediction # Main function to run the Streamlit app def main(): # Set up Streamlit UI st.title("Hate Speech Detection") st.write("Enter text below to detect hate speech:") input_text = st.text_area("Input Text", "") if st.button("Detect Hate Speech"): if input_text: # Load the model lstm_model = load_lstm_model(model_path) # Predict hate speech prediction = predict_hate_speech(input_text, lstm_model) # Convert the list to a numpy array arr = np.array(prediction[0]) max_index = np.argmax(arr) if max_index == 1: #negative st.error("Hate Speech Detected") else: st.success("No Hate Speech Detected") else: st.warning("Please enter some text") # Run the app if __name__ == "__main__": main()