import os import gradio as gr import matplotlib.pyplot as plt import numpy as np import pandas as pd import skops.io as sio from io import BytesIO class StockPredictor: """ A class used to load stock prediction models, process historical stock data, and forecast stock prices. Attributes ---------- model_dir : str Directory containing the trained models. data_dir : str Directory containing the historical stock data CSV files. models : dict Dictionary of loaded models. Methods ------- load_models(model_dir): Loads the models from the specified directory. load_stock_data(ticker): Loads and processes historical stock data from a CSV file. forecast(ticker, days): Forecasts stock prices for the specified ticker and number of days. """ def __init__(self, model_dir="model/SKLearn_Models", data_dir="data"): """ Initializes the StockPredictor class by loading the models and setting the data directory. Parameters ---------- model_dir : str Directory containing the trained models. data_dir : str Directory containing the historical stock data CSV files. """ self.models = self.load_models(model_dir) self.data_dir = data_dir def load_models(self, model_dir): """ Loads the models from the specified directory. Parameters ---------- model_dir : str Directory containing the trained models. Returns ------- dict Dictionary of loaded models. """ models = {} for file in os.listdir(model_dir): if file.endswith(".skops"): ticker = file.split("_")[0] models[ticker] = sio.load(os.path.join(model_dir, file)) return models def load_stock_data(self, ticker): """ Loads and processes historical stock data from a CSV file. Parameters ---------- ticker : str Stock ticker symbol. Returns ------- pandas.DataFrame Processed historical stock data. """ # Construct the CSV file path csv_path = os.path.join(self.data_dir, f"{ticker}.csv") data = pd.read_csv(csv_path) # Convert 'date' to datetime data["date"] = pd.to_datetime(data["date"]) # Filter the data to start from the year 2000 data = data[data["date"] >= "2000-01-01"] # Sort by date data.sort_values("date", inplace=True) # Feature engineering: create new features such as year, month, day, and moving averages data["year"] = data["date"].dt.year data["month"] = data["date"].dt.month data["day"] = data["date"].dt.day data["ma_5"] = data["close"].rolling(window=5).mean() data["ma_10"] = data["close"].rolling(window=10).mean() # Drop rows with NaN values created by rolling window data.dropna(inplace=True) return data def forecast(self, ticker, days): """ Forecasts stock prices for the specified ticker and number of days. Parameters ---------- ticker : str Stock ticker symbol. days : int Number of days for forecasting. Returns ------- tuple A tuple containing a DataFrame with dates, actual close values, and predicted close values, and the plot as a numpy array. """ model = self.models.get(ticker) if model: # Load historical stock data data = self.load_stock_data(ticker) # Define features features = ["year", "month", "day", "ma_5", "ma_10"] # Use the last available values for features last_date = data["date"].max() next_30_days = pd.date_range( start=last_date + pd.Timedelta(days=1), periods=days ) last_values = data[features].iloc[-1].copy() last_5_close = data["close"].iloc[-5:].tolist() last_10_close = data["close"].iloc[-10:].tolist() predictions = [] for date in next_30_days: last_values["year"] = date.year last_values["month"] = date.month last_values["day"] = date.day # Ensure input features are in the correct format prediction_input = pd.DataFrame([last_values], columns=features) prediction = model.predict(prediction_input)[0] predictions.append(prediction) # Update the moving averages dynamically last_5_close.append(prediction) last_10_close.append(prediction) if len(last_5_close) > 5: last_5_close.pop(0) if len(last_10_close) > 10: last_10_close.pop(0) last_values["ma_5"] = np.mean(last_5_close) last_values["ma_10"] = np.mean(last_10_close) prediction_df = pd.DataFrame( {"date": next_30_days, "predicted_close": predictions} ) # Concatenate actual and predicted data for plotting actual_df = data[["date", "close"]].iloc[-30:].copy() actual_df.rename(columns={"close": "actual_close"}, inplace=True) plot_data = pd.concat([actual_df, prediction_df], ignore_index=True) plt.figure(figsize=(14, 7)) plt.plot( plot_data["date"].iloc[:30], plot_data["actual_close"].iloc[:30], label="Actual", ) plt.plot( plot_data["date"].iloc[30:], plot_data["predicted_close"].iloc[30:], label="Predicted", ) plt.xlabel("Date") plt.ylabel("Stock Price") plt.title( f"Last 30 Days Actual and Next {days} Days Prediction for {ticker}" ) plt.legend() plt.grid(True) plt.xticks(rotation=45) # Save the plot to a numpy array buf = BytesIO() plt.savefig(buf, format="png") buf.seek(0) img = np.array(plt.imread(buf)) plt.close() return plot_data, img else: return pd.DataFrame({"Error": ["Model not found"]}), None def create_gradio_interface(stock_predictor): """ Creates the Gradio interface for the stock predictor. Parameters ---------- stock_predictor : StockPredictor Instance of the StockPredictor class. Returns ------- gradio.Interface The Gradio interface. """ tickers = list(stock_predictor.models.keys()) dropdown = gr.Dropdown(choices=tickers, label="Select Ticker") slider = gr.Slider( minimum=1, maximum=30, step=1, label="Number of Days for Forecasting", ) iface = gr.Interface( fn=stock_predictor.forecast, inputs=[dropdown, slider], outputs=[ gr.DataFrame(headers=["date", "actual_close", "predicted_close"]), gr.Image(type="numpy"), ], title="Stock Price Forecasting", description="Select a ticker and number of days to forecast stock prices.", ) return iface if __name__ == "__main__": # Initialize StockPredictor and create Gradio interface stock_predictor = StockPredictor( model_dir="model/SKLearn_Models", data_dir="data/Cleaned_Kaggle_NASDAQ_Daily_Data", ) iface = create_gradio_interface(stock_predictor) # Launch the app iface.launch()