Spaces:

attaelahi
/

sms-spam-classification

Runtime error

App Files Files Community

attaelahi commited on Nov 16, 2023

Commit

5f27c76

1 Parent(s): 0f26191

Update app.py

Browse files

Files changed (1) hide show

app.py +97 -22

app.py CHANGED Viewed

@@ -1,31 +1,106 @@
-# Filename: app.py
 import streamlit as st
-from transformers import pipeline
-# Load a different text classification model for spam detection
-classifier = pipeline("text-classification", model="mrm8488/bert-tiny-finetuned-sms-spam-detection")
-def main():
-    st.title("Spam Detection App")
-    # Text input for the user to enter a message
-    user_input = st.text_input("Enter a message:")
-    if st.button("Check for Spam"):
-        if user_input:
-            # Use the loaded model to classify the user's input
-            result = classifier(user_input)[0]
-            # Display the result
-            st.write(f"**Result:** {result['label']} (Confidence: {result['score']:.2%})")
-            # Show a message based on the classification
-            if result['label'] == 'spam':
-                st.error("This message is classified as spam.")
-            else:
-                st.success("This message is not spam.")
         else:
-            st.warning("Please enter a message before checking for spam.")
-if __name__ == "__main__":
-    main()

 import streamlit as st
+import pandas as pd
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
+import torch
+import os
+# Specify the directory where you'll save your fine-tuned model
+FINE_TUNED_MODEL_DIR = "./fine_tuned_sms_spam_model"
+# Load model and tokenizer
+tokenizer = AutoTokenizer.from_pretrained("wesleyacheng/sms-spam-classification-with-bert")
+model = AutoModelForSequenceClassification.from_pretrained("wesleyacheng/sms-spam-classification-with-bert")
+# Create a Streamlit app
+st.title("SMS Spam Classification")
+def classify_spam_or_ham(text):
+    # Tokenize input text
+    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
+    # Perform classification
+    with torch.no_grad():
+        outputs = model(**inputs)
+    # Get the predicted label
+    predicted_label = "Spam" if outputs.logits[0][1] > outputs.logits[0][0] else "Not-Spam"
+    return predicted_label
+st.write("Single SMS Example:")
+# Function to classify a single SMS
+def classify_single_sms(text):
+    if isinstance(text, str):  # Check if text is a string
+        prediction = classify_spam_or_ham(text)
+        st.write(f"SMS: {text}")
+        st.write(f"Prediction: {prediction}")
+        st.write("--------")
+    else:
+        st.warning("Skipping non-text data.")
+# Main Streamlit code for CSV file upload
+st.sidebar.header("Upload CSV File")
+uploaded_file = st.sidebar.file_uploader("Upload a CSV file with SMS messages:", type=["csv"])
+if uploaded_file is not None:
+    st.sidebar.write("Classifying SMS messages in the uploaded file...")
+    try:
+        df = pd.read_csv(uploaded_file, encoding='latin1')  # Specify the appropriate encoding
+    except UnicodeDecodeError:
+        st.sidebar.error("Error: Unable to decode the CSV file. Please make sure it is in the correct encoding.")
+    else:
+        # Allow the user to select the column containing SMS messages
+        selected_column = st.sidebar.selectbox("Select the SMS column:", df.columns)
+        if df[selected_column].dtype == "object":
+            st.write("Classifications:")
+            for sms_text in df[selected_column]:
+                classify_single_sms(sms_text)
         else:
+            st.sidebar.error("Selected column does not contain text data and cannot be tokenized.")
+        st.sidebar.write("Classification completed!")
+st.sidebar.write("Or classify a single SMS:")
+user_input = st.sidebar.text_area("Enter an SMS message:")
+if st.sidebar.button("Classify"):
+    if user_input:
+        classify_single_sms(user_input)
+    else:
+        st.sidebar.warning("Please enter an SMS message.")
+st.write("Or fine-tune the model:")
+if st.button("Fine-Tune Model"):
+    if uploaded_file is not None and selected_column and df[selected_column].dtype == "object":
+        # Use the data from the uploaded CSV file as the fine-tuning dataset
+        custom_dataset = df[selected_column]
+        # Specify your fine-tuning training arguments
+        training_args = TrainingArguments(
+            output_dir=FINE_TUNED_MODEL_DIR,
+            overwrite_output_dir=True,
+            per_device_train_batch_size=8,
+            num_train_epochs=3,
+        )
+        trainer = Trainer(
+            model=model,
+            args=training_args,
+            train_dataset=custom_dataset,
+        )
+        # Fine-tune the model
+        trainer.train()
+        # Save the fine-tuned model
+        model.save_pretrained(FINE_TUNED_MODEL_DIR)
+        tokenizer.save_pretrained(FINE_TUNED_MODEL_DIR)
+        st.write("Model has been fine-tuned and saved.")
+    elif not uploaded_file:
+        st.warning("Please upload a CSV file before fine-tuning.")
+    elif not selected_column:
+        st.warning("Please select the SMS column before fine-tuning.")
+    else:
+        st.warning("The selected column does not contain text data and cannot be used for fine-tuning.")