Spaces:

lydiadida
/

speachdetection

Sleeping

App Files Files Community

lydiadida commited on Mar 16

Commit

408900f

•

1 Parent(s): ee52b70

Upload 3 files

Browse files

Files changed (3) hide show

app.py +112 -0
my_model.h5 +3 -0
requirements.txt +4 -0

app.py ADDED Viewed

	@@ -0,0 +1,112 @@

+import streamlit as st
+from keras.models import load_model
+import nltk
+import re
+from nltk.tokenize import TweetTokenizer
+from tensorflow.keras.preprocessing.text import Tokenizer
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+import subprocess
+import numpy as np
+# Download NLTK stopwords if not already downloaded
+try:
+    nltk.data.find('corpora/stopwords')
+except LookupError:
+    nltk.download('stopwords')
+# Additional imports
+from nltk.corpus import stopwords
+# Download NLTK punkt tokenizer if not already downloaded
+try:
+    nltk.data.find('tokenizers/punkt/PY3/english.pickle')
+except LookupError:
+    nltk.download('punkt')
+# Additional imports
+from nltk.tokenize import word_tokenize
+# Load the LSTM model
+model_path = "./my_model.h5"  # Set your model path here
+def load_lstm_model(model_path):
+    return load_model(model_path)
+def clean_text(text):
+    # Remove stopwords
+    stop_words = set(stopwords.words('english'))
+    words = word_tokenize(text)
+    filtered_words = [word for word in words if word not in stop_words]
+    # Remove Twitter usernames
+    text = re.sub(r'@\w+', '', ' '.join(filtered_words))
+    # Remove URLs
+    text = re.sub(r'http\S+', '', text)
+    # Tokenize using TweetTokenizer
+    tokenizer = TweetTokenizer(preserve_case=True)
+    text = tokenizer.tokenize(text)
+    # Remove hashtag symbols
+    text = [word.replace('#', '') for word in text]
+    # Remove short words
+    text = ' '.join([word.lower() for word in text if len(word) > 2])
+    # Remove digits
+    text = re.sub(r'\d+', '', text)
+    # Remove non-alphanumeric characters
+    text = re.sub(r'[^a-zA-Z\s]', '', text)
+    return text
+def preprocess_text(text):
+    # Clean the text
+    cleaned_text = clean_text(text)
+    # Tokenize and pad sequences
+    token = Tokenizer()
+    token.fit_on_texts([cleaned_text])
+    text_sequences = token.texts_to_sequences([cleaned_text])
+    padded_sequences = pad_sequences(text_sequences, maxlen=100)
+    return padded_sequences
+# Function to predict hate speech
+def predict_hate_speech(text, lstm_model):
+    # Preprocess the text
+    padded_sequences = preprocess_text(text)
+    prediction = lstm_model.predict(padded_sequences)
+    return prediction
+# Main function to run the Streamlit app
+def main():
+    # Set up Streamlit UI
+    st.title("Hate Speech Detection")
+    st.write("Enter text below to detect hate speech:")
+    input_text = st.text_area("Input Text", "")
+    if st.button("Detect Hate Speech"):
+        if input_text:
+            # Load the model
+            lstm_model = load_lstm_model(model_path)
+            # Predict hate speech
+            prediction = predict_hate_speech(input_text, lstm_model)
+            # Convert the list to a numpy array
+            arr = np.array(prediction[0])
+            max_index = np.argmax(arr)
+            if max_index == 1:
+                #negative
+                st.error("Hate Speech Detected")
+            else:
+                st.success("No Hate Speech Detected")
+        else:
+            st.warning("Please enter some text")
+# Run the app
+if __name__ == "__main__":
+    main()

my_model.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:16cac2f352b17d0cac372fa35e56d49363b58e9a2c8a15f54cdf227009419567
+size 9365784

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+streamlit
+keras
+nltk
+tensorflow