Spaces:

Reaumur
/

StocKnock

Sleeping

File size: 6,923 Bytes

a02b141

import streamlit as st
import pandas as pd
import yfinance as yf
import joblib
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from PIL import Image

# Load the banner image
banner = Image.open("StocKnock.png")
banner1 = Image.open("StocKnock2.png")
Tesla = Image.open('Tesla.png')
NVDA = Image.open('Nvidia.png')
Nio = Image.open('Nio.png')

# Load the model pipeline
model_pipeline = joblib.load('model_LinReg.pkl')

# Load SARIMA models for each company
sarima_models = {
    'TSLA': joblib.load('SARIMAX_model_TSLA.pkl'),
    'NVDA': joblib.load('SARIMAX_model_NVDA.pkl'),
    'NIO': joblib.load('SARIMAX_model_NIO.pkl')
}

# Initialize VADER sentiment analyzer
sia = SentimentIntensityAnalyzer()

def analyze_sentiment(text):
    return sia.polarity_scores(text)

def categorize_sentiment(compound_score):
    if compound_score >= 0.05:
        return 'Positive'
    elif compound_score <= -0.05:
        return 'Negative'
    else:
        return 'Neutral'

def get_stock_data(ticker):
    stock_data = yf.download(ticker, period='1y', interval='1d')  # Get 1 year of data for better SARIMA forecasting
    if stock_data.empty:
        return None
    return stock_data

def create_input_df(company, headlines):
    company_ticker = {'Tesla': 'TSLA', 'Nvidia': 'NVDA', 'NIO': 'NIO'}
    ticker = company_ticker.get(company)
    if not ticker:
        return None

    stock_data = get_stock_data(ticker)
    if stock_data is None:
        return None

    # Filter stock data to include only entries from 2024
    stock_data_2024 = stock_data[stock_data.index.year == 2024]
    if stock_data_2024.empty:
        return None

    latest_stock = stock_data_2024.iloc[-1]

    data = {
        'Company_ID': [ticker],
        'Open': [latest_stock['Open']],
        'High': [latest_stock['High']],
        'Low': [latest_stock['Low']],
        'Close': [latest_stock['Close']],
        'Volume': [latest_stock['Volume']],
        'news_count': [len(headlines)]
    }

    # Initialize sentiment scores
    pos_score = neg_score = neu_score = compound_score = 0

    # Calculate sentiment scores for each headline
    for headline in headlines:
        sentiment = analyze_sentiment(headline)
        pos_score += sentiment['pos']
        neg_score += sentiment['neg']
        neu_score += sentiment['neu']
        compound_score += sentiment['compound']

    # Calculate average sentiment scores
    num_headlines = len(headlines)
    avg_pos_score = pos_score / num_headlines
    avg_neg_score = neg_score / num_headlines
    avg_neu_score = neu_score / num_headlines
    avg_compound_score = compound_score / num_headlines

    # Categorize sentiment based on the average compound score
    sentiment_category = categorize_sentiment(avg_compound_score)

    # Add sentiment scores and category to the data dictionary
    data.update({
        'positive': [avg_pos_score],
        'negative': [avg_neg_score],
        'neutral': [avg_neu_score],
        'compound': [avg_compound_score],
        'sentiment_category': [sentiment_category]
    })

    return pd.DataFrame(data), stock_data_2024

def predict_stock_price(company, headlines):
    if len(headlines) > 10:
        return "Please provide up to 10 headlines."

    input_df, stock_data_2024 = create_input_df(company, headlines)
    if input_df is None:
        return "Invalid company selected or no data available for 2024."

    st.write("Input DataFrame:")
    st.write(input_df)  # Display the input DataFrame for debugging

    # Predict the next closing price
    predicted_next_close = model_pipeline.predict(input_df)[0]

    # Perform SARIMA forecast
    ticker = input_df['Company_ID'][0]
    sarima_model = sarima_models.get(ticker)
    if sarima_model is None:
        return "SARIMA model not available for the selected company."

    # Prepare data for SARIMA forecast with predicted value
    history_with_predicted = stock_data_2024['Adj Close']
    future_with_predicted = np.append(history_with_predicted, predicted_next_close)

    # Prepare data for SARIMA forecast without predicted value
    history_without_predicted = stock_data_2024['Adj Close']

    # Forecast future prices with predicted value
    forecast_steps = 30
    forecast_with_predicted = sarima_model.forecast(steps=forecast_steps, exog=[predicted_next_close])

    # Plot the results
    fig = make_subplots(rows=1, cols=1)

    # Historical data
    fig.add_trace(go.Scatter(x=history_without_predicted.index, y=history_without_predicted, mode='lines', name='Historical Data'))

    # Predicted next close price
    predicted_date = history_with_predicted.index[-1] + pd.Timedelta(days=1)
    fig.add_trace(go.Scatter(x=[predicted_date], y=[predicted_next_close], mode='markers', name='Predicted Next Close'))

    # Forecast data with predicted value
    forecast_index_with_predicted = [predicted_date + pd.Timedelta(days=i) for i in range(1, forecast_steps + 1)]
    forecast_with_predicted_line = go.Scatter(x=forecast_index_with_predicted, y=forecast_with_predicted, mode='lines', name='Forecast')
    fig.add_trace(forecast_with_predicted_line)

    fig.update_layout(title=f"SARIMA Forecast for {company}", xaxis_title="Date", yaxis_title="Price")

    st.plotly_chart(fig)

    return f"Predicted Next Close Price: {predicted_next_close}"

def main():
    st.sidebar.image(banner1, use_column_width=True)
    st.sidebar.title("**StocKnock**")
    st.sidebar.write("Welcome to **StocKnock**, where we use sentiment analysis on social media to predict stock prices. Join us for smarter investing!")
    st.sidebar.title("What model do we use?")
    st.sidebar.write("We utilize **Linear Regression** to predict the stock for the next day and **Sarimax** to forecast future stock prices, including the predicted results.")
    st.sidebar.title("Stocks you can predict")
    st.sidebar.write("For the time being, these are the stock that you can predict!")
    st.sidebar.image(Tesla, use_column_width=True)
    st.sidebar.image(NVDA, use_column_width=True)
    st.sidebar.image(Nio, use_column_width=True)
    st.image(banner, use_column_width=True)
    st.title("Stock Price Prediction App")
    st.write("Select a company and provide up to 10 headlines to predict the next stock price based on tweets.")

    company_options = ['Tesla', 'Nvidia', 'NIO']
    company = st.selectbox("Select Company", company_options, key="company_select")

    headlines = st.text_area("Enter Headlines (up to 10 headlines)", key="headlines_input")

    if st.button("Predict", key="predict_button"):
        if headlines:
            headlines = headlines.split("\n")
        else:
            st.error("Please enter headlines.")

        prediction = predict_stock_price(company, headlines)
        st.success(prediction)

if __name__ == "__main__":
    main()