File size: 8,540 Bytes
4e705bc
 
 
 
 
 
d26e4d0
4e705bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d26e4d0
 
 
 
4e705bc
 
a09d8a6
 
 
4e705bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d26e4d0
4e705bc
 
 
 
 
 
 
a09d8a6
4e705bc
facab2e
 
 
 
 
d26e4d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3605d86
 
 
 
 
 
d26e4d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4e705bc
facab2e
 
 
 
 
 
4e705bc
d26e4d0
facab2e
 
4e705bc
d26e4d0
 
 
 
4e705bc
 
 
 
d26e4d0
 
 
 
 
4e705bc
 
d26e4d0
4e705bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
facab2e
d26e4d0
4e705bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
import os
import gradio as gr
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import skops.io as sio
from io import BytesIO


class StockPredictor:
    """

    A class used to load stock prediction models, process historical stock data,

    and forecast stock prices.



    Attributes

    ----------

    model_dir : str

        Directory containing the trained models.

    data_dir : str

        Directory containing the historical stock data CSV files.

    models : dict

        Dictionary of loaded models.



    Methods

    -------

    load_models(model_dir):

        Loads the models from the specified directory.

    load_stock_data(ticker):

        Loads and processes historical stock data from a CSV file.

    forecast(ticker, days):

        Forecasts stock prices for the specified ticker and number of days.

    """

    def __init__(self, model_dir="model/SKLearn_Models", data_dir="data"):
        """

        Initializes the StockPredictor class by loading the models and setting the data directory.



        Parameters

        ----------

        model_dir : str

            Directory containing the trained models.

        data_dir : str

            Directory containing the historical stock data CSV files.

        """
        self.models = self.load_models(model_dir)
        self.data_dir = data_dir

    def load_models(self, model_dir):
        """

        Loads the models from the specified directory.



        Parameters

        ----------

        model_dir : str

            Directory containing the trained models.



        Returns

        -------

        dict

            Dictionary of loaded models.

        """
        models = {}
        for file in os.listdir(model_dir):
            if file.endswith(".skops"):
                ticker = file.split("_")[0]
                models[ticker] = sio.load(os.path.join(model_dir, file))
        return models

    def load_stock_data(self, ticker):
        """

        Loads and processes historical stock data from a CSV file.



        Parameters

        ----------

        ticker : str

            Stock ticker symbol.



        Returns

        -------

        pandas.DataFrame

            Processed historical stock data.

        """
        # Construct the CSV file path
        csv_path = os.path.join(self.data_dir, f"{ticker}.csv")
        data = pd.read_csv(csv_path)

        # Convert 'date' to datetime
        data["date"] = pd.to_datetime(data["date"])

        # Filter the data to start from the year 2000
        data = data[data["date"] >= "2000-01-01"]

        # Sort by date
        data.sort_values("date", inplace=True)

        # Feature engineering: create new features such as year, month, day, and moving averages
        data["year"] = data["date"].dt.year
        data["month"] = data["date"].dt.month
        data["day"] = data["date"].dt.day
        data["ma_5"] = data["close"].rolling(window=5).mean()
        data["ma_10"] = data["close"].rolling(window=10).mean()
        # Adding lag features
        data["lag_5"] = data["close"].shift(5)
        data["lag_10"] = data["close"].shift(10)

        # Drop rows with NaN values created by rolling window
        data.dropna(inplace=True)

        return data

    def forecast(self, ticker, days):
        """

        Forecasts stock prices for the specified ticker and number of days.



        Parameters

        ----------

        ticker : str

            Stock ticker symbol.

        days : int

            Number of days for forecasting.



        Returns

        -------

        tuple

            A tuple containing a DataFrame with dates, actual close values, and predicted close values,

            and the plot as a numpy array.

        """
        model = self.models.get(ticker)
        if model:
            # Load historical stock data
            data = self.load_stock_data(ticker)

            # Define features
            features = ["year", "month", "day", "ma_5", "ma_10", "lag_5", "lag_10"]

            # Predict the actual values in the dataset
            X_actual = data[features]
            actual_predictions = model.predict(X_actual)
            data["predicted_close"] = actual_predictions

            # Use the last available values for features
            last_date = data["date"].max()
            next_30_days = pd.date_range(
                start=last_date + pd.Timedelta(days=1), periods=days
            )
            last_values = data[features].iloc[-1].copy()
            last_5_close = data["close"].iloc[-5:].tolist()
            last_10_close = data["close"].iloc[-10:].tolist()
            predictions = []

            for date in next_30_days:
                last_values["year"] = date.year
                last_values["month"] = date.month
                last_values["day"] = date.day

                # Update the lag features
                if len(last_5_close) >= 5:
                    last_values["lag_5"] = last_5_close[-5]
                if len(last_10_close) >= 10:
                    last_values["lag_10"] = last_10_close[-10]

                # Ensure input features are in the correct format
                prediction_input = pd.DataFrame([last_values], columns=features)
                prediction = model.predict(prediction_input)[0]
                predictions.append(prediction)

                # Update the moving averages dynamically
                last_5_close.append(prediction)
                last_10_close.append(prediction)
                if len(last_5_close) > 5:
                    last_5_close.pop(0)
                if len(last_10_close) > 10:
                    last_10_close.pop(0)

                last_values["ma_5"] = np.mean(last_5_close)
                last_values["ma_10"] = np.mean(last_10_close)

            prediction_df = pd.DataFrame(
                {"date": next_30_days, "predicted_close": predictions}
            )

            # Concatenate actual and predicted data for plotting, limiting to last 60 days
            combined_df = pd.concat(
                [data[["date", "close", "predicted_close"]], prediction_df],
                ignore_index=True,
            )
            plot_data = combined_df.tail(60)

            plt.figure(figsize=(14, 7))
            plt.plot(plot_data["date"], plot_data["close"], label="Actual")
            plt.plot(plot_data["date"], plot_data["predicted_close"], label="Predicted")
            plt.xlabel("Date")
            plt.ylabel("Stock Price")
            plt.title(
                f"Last 30 Days Actual and Next {days} Days Prediction for {ticker}"
            )
            plt.legend()
            plt.grid(True)
            plt.xticks(rotation=45)

            # Save the plot to a numpy array
            buf = BytesIO()
            plt.savefig(buf, format="png")
            buf.seek(0)
            img = np.array(plt.imread(buf))
            plt.close()

            return plot_data, img
        else:
            return pd.DataFrame({"Error": ["Model not found"]}), None


def create_gradio_interface(stock_predictor):
    """

    Creates the Gradio interface for the stock predictor.



    Parameters

    ----------

    stock_predictor : StockPredictor

        Instance of the StockPredictor class.



    Returns

    -------

    gradio.Interface

        The Gradio interface.

    """
    tickers = list(stock_predictor.models.keys())
    dropdown = gr.Dropdown(choices=tickers, label="Select Ticker")
    slider = gr.Slider(
        minimum=1,
        maximum=30,
        step=1,
        label="Number of Days for Forecasting",
    )

    iface = gr.Interface(
        fn=stock_predictor.forecast,
        inputs=[dropdown, slider],
        outputs=[
            gr.DataFrame(headers=["date", "close", "predicted_close"]),
            gr.Image(type="numpy"),
        ],
        title="Stock Price Forecasting",
        description="Select a ticker and number of days to forecast stock prices.",
    )

    return iface


if __name__ == "__main__":
    # Initialize StockPredictor and create Gradio interface
    stock_predictor = StockPredictor(
        model_dir="model/SKLearn_Models",
        data_dir="data/Cleaned_Kaggle_NASDAQ_Daily_Data",
    )
    iface = create_gradio_interface(stock_predictor)

    # Launch the app
    iface.launch()