File size: 3,229 Bytes
391f85f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# Module 1: Import necessary packages
import streamlit as st
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.svm import LinearSVC
from sklearn.naive_bayes import MultinomialNB
import warnings
import streamlit_lottie

warnings.filterwarnings("ignore")

# Set page configuration (must be the first Streamlit command)
page_icon = ":metro:"  # emojis: https://www.webfx.com/tools/emoji-cheat-sheet/
layout = "wide"
page_title = "Fake News Detection"
st.set_page_config(page_title=page_title, page_icon=page_icon, layout=layout)

# Module 2: Load the dataset
@st.cache_data
def load_data():
    data = pd.read_csv("fake_or_real_news.csv")
    data['fake'] = data['label'].apply(lambda x: 0 if x == 'REAL' else 1)
    return data

# Module 3: Select Vectorizer and Classifier
def select_model():
    vectorizer_type = st.sidebar.selectbox("Select Vectorizer", ["TF-IDF", "Bag of Words"])
    classifier_type = st.sidebar.selectbox("Select Classifier", ["Linear SVM", "Naive Bayes"])
    
    vectorizer = None
    if vectorizer_type == "TF-IDF":
        vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
    elif vectorizer_type == "Bag of Words":
        vectorizer = CountVectorizer(stop_words='english', max_df=0.7)
    
    classifier = None
    if classifier_type == "Linear SVM":
        classifier = LinearSVC()
    elif classifier_type == "Naive Bayes":
        classifier = MultinomialNB()
    
    return vectorizer, classifier

# Module 4: Train the model (no caching here)
def train_model(data, vectorizer, classifier):
    x_vectorized = vectorizer.fit_transform(data['text'])
    clf = classifier.fit(x_vectorized, data['fake'])
    return clf

# Module 5: Streamlit app
def main():
    # Streamlit app
    st.title(page_title + " " + page_icon)
    st.lottie("https://lottie.host/bd0c4818-c5a6-4e42-b407-746bc448c2c7/ipVUdgFncO.json", width=200, height=200)

    # --- HIDE STREAMLIT STYLE ---
    hide_st_style = """
    <style>
    #MainMenu {visibility: hidden;}
    footer {visibility: hidden;}
    header {visibility: hidden;}
    </style>
    """
    st.markdown(hide_st_style, unsafe_allow_html=True)

    # Load data
    data = load_data()
    
    # Select vectorizer and classifier
    vectorizer, classifier = select_model()
    
    # Text input for user to input news article
    user_input = st.text_area("Enter your news article here:")
    
    # When user submits the input
    if st.button("Check"):
        # Train the model
        clf = train_model(data, vectorizer, classifier)
        
        # Vectorize the user input
        input_vectorized = vectorizer.transform([user_input])
        
        # Predict the label of the input
        prediction = clf.predict(input_vectorized)
        
        # Convert prediction to integer for interpretation
        result = int(prediction[0])
        
        # Display the result
        if result == 1:
            st.error("This news article is likely fake!")
        else:
            st.success("This news article seems to be real.")

# Run the Streamlit app
if __name__ == "__main__":
    main()

st.markdown("**Created with enthusiasm by SuperSam**")