File size: 6,923 Bytes
a02b141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
import streamlit as st
import pandas as pd
import yfinance as yf
import joblib
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from PIL import Image

# Load the banner image
banner = Image.open("StocKnock.png")
banner1 = Image.open("StocKnock2.png")
Tesla = Image.open('Tesla.png')
NVDA = Image.open('Nvidia.png')
Nio = Image.open('Nio.png')

# Load the model pipeline
model_pipeline = joblib.load('model_LinReg.pkl')

# Load SARIMA models for each company
sarima_models = {
    'TSLA': joblib.load('SARIMAX_model_TSLA.pkl'),
    'NVDA': joblib.load('SARIMAX_model_NVDA.pkl'),
    'NIO': joblib.load('SARIMAX_model_NIO.pkl')
}

# Initialize VADER sentiment analyzer
sia = SentimentIntensityAnalyzer()

def analyze_sentiment(text):
    return sia.polarity_scores(text)

def categorize_sentiment(compound_score):
    if compound_score >= 0.05:
        return 'Positive'
    elif compound_score <= -0.05:
        return 'Negative'
    else:
        return 'Neutral'

def get_stock_data(ticker):
    stock_data = yf.download(ticker, period='1y', interval='1d')  # Get 1 year of data for better SARIMA forecasting
    if stock_data.empty:
        return None
    return stock_data

def create_input_df(company, headlines):
    company_ticker = {'Tesla': 'TSLA', 'Nvidia': 'NVDA', 'NIO': 'NIO'}
    ticker = company_ticker.get(company)
    if not ticker:
        return None

    stock_data = get_stock_data(ticker)
    if stock_data is None:
        return None

    # Filter stock data to include only entries from 2024
    stock_data_2024 = stock_data[stock_data.index.year == 2024]
    if stock_data_2024.empty:
        return None

    latest_stock = stock_data_2024.iloc[-1]

    data = {
        'Company_ID': [ticker],
        'Open': [latest_stock['Open']],
        'High': [latest_stock['High']],
        'Low': [latest_stock['Low']],
        'Close': [latest_stock['Close']],
        'Volume': [latest_stock['Volume']],
        'news_count': [len(headlines)]
    }

    # Initialize sentiment scores
    pos_score = neg_score = neu_score = compound_score = 0

    # Calculate sentiment scores for each headline
    for headline in headlines:
        sentiment = analyze_sentiment(headline)
        pos_score += sentiment['pos']
        neg_score += sentiment['neg']
        neu_score += sentiment['neu']
        compound_score += sentiment['compound']

    # Calculate average sentiment scores
    num_headlines = len(headlines)
    avg_pos_score = pos_score / num_headlines
    avg_neg_score = neg_score / num_headlines
    avg_neu_score = neu_score / num_headlines
    avg_compound_score = compound_score / num_headlines

    # Categorize sentiment based on the average compound score
    sentiment_category = categorize_sentiment(avg_compound_score)

    # Add sentiment scores and category to the data dictionary
    data.update({
        'positive': [avg_pos_score],
        'negative': [avg_neg_score],
        'neutral': [avg_neu_score],
        'compound': [avg_compound_score],
        'sentiment_category': [sentiment_category]
    })

    return pd.DataFrame(data), stock_data_2024

def predict_stock_price(company, headlines):
    if len(headlines) > 10:
        return "Please provide up to 10 headlines."

    input_df, stock_data_2024 = create_input_df(company, headlines)
    if input_df is None:
        return "Invalid company selected or no data available for 2024."

    st.write("Input DataFrame:")
    st.write(input_df)  # Display the input DataFrame for debugging

    # Predict the next closing price
    predicted_next_close = model_pipeline.predict(input_df)[0]

    # Perform SARIMA forecast
    ticker = input_df['Company_ID'][0]
    sarima_model = sarima_models.get(ticker)
    if sarima_model is None:
        return "SARIMA model not available for the selected company."

    # Prepare data for SARIMA forecast with predicted value
    history_with_predicted = stock_data_2024['Adj Close']
    future_with_predicted = np.append(history_with_predicted, predicted_next_close)

    # Prepare data for SARIMA forecast without predicted value
    history_without_predicted = stock_data_2024['Adj Close']

    # Forecast future prices with predicted value
    forecast_steps = 30
    forecast_with_predicted = sarima_model.forecast(steps=forecast_steps, exog=[predicted_next_close])

    # Plot the results
    fig = make_subplots(rows=1, cols=1)

    # Historical data
    fig.add_trace(go.Scatter(x=history_without_predicted.index, y=history_without_predicted, mode='lines', name='Historical Data'))

    # Predicted next close price
    predicted_date = history_with_predicted.index[-1] + pd.Timedelta(days=1)
    fig.add_trace(go.Scatter(x=[predicted_date], y=[predicted_next_close], mode='markers', name='Predicted Next Close'))

    # Forecast data with predicted value
    forecast_index_with_predicted = [predicted_date + pd.Timedelta(days=i) for i in range(1, forecast_steps + 1)]
    forecast_with_predicted_line = go.Scatter(x=forecast_index_with_predicted, y=forecast_with_predicted, mode='lines', name='Forecast')
    fig.add_trace(forecast_with_predicted_line)

    fig.update_layout(title=f"SARIMA Forecast for {company}", xaxis_title="Date", yaxis_title="Price")

    st.plotly_chart(fig)

    return f"Predicted Next Close Price: {predicted_next_close}"

def main():
    st.sidebar.image(banner1, use_column_width=True)
    st.sidebar.title("**StocKnock**")
    st.sidebar.write("Welcome to **StocKnock**, where we use sentiment analysis on social media to predict stock prices. Join us for smarter investing!")
    st.sidebar.title("What model do we use?")
    st.sidebar.write("We utilize **Linear Regression** to predict the stock for the next day and **Sarimax** to forecast future stock prices, including the predicted results.")
    st.sidebar.title("Stocks you can predict")
    st.sidebar.write("For the time being, these are the stock that you can predict!")
    st.sidebar.image(Tesla, use_column_width=True)
    st.sidebar.image(NVDA, use_column_width=True)
    st.sidebar.image(Nio, use_column_width=True)
    st.image(banner, use_column_width=True)
    st.title("Stock Price Prediction App")
    st.write("Select a company and provide up to 10 headlines to predict the next stock price based on tweets.")

    company_options = ['Tesla', 'Nvidia', 'NIO']
    company = st.selectbox("Select Company", company_options, key="company_select")

    headlines = st.text_area("Enter Headlines (up to 10 headlines)", key="headlines_input")

    if st.button("Predict", key="predict_button"):
        if headlines:
            headlines = headlines.split("\n")
        else:
            st.error("Please enter headlines.")

        prediction = predict_stock_price(company, headlines)
        st.success(prediction)

if __name__ == "__main__":
    main()