import yfinance as yf import pandas as pd import numpy as np import torch import joblib from tqdm import tqdm from modeling_stockllama import StockLlamaForForecasting from configuration_stockllama import StockLlamaConfig from peft import LoraConfig, get_peft_model from datasets import Dataset import os from transformers import Trainer, TrainingArguments from huggingface_hub import login, upload_file, hf_hub_download import wandb import gradio as gr import spaces from huggingface_hub import HfApi hf_api = HfApi() HF_TOKEN = os.getenv('HF_TOKEN') WANDB_TOKEN = os.getenv('WANDB_TOKEN') login(token=HF_TOKEN) wandb.login(key=WANDB_TOKEN) class Scaler: def __init__(self, feature_range): self.feature_range = feature_range self.min_df = None self.max_df = None def fit(self, df: pd.Series): self.min_df = df.min() self.max_df = df.max() def transform(self, df: pd.Series) -> pd.Series: min_val, max_val = self.feature_range scaled_df = (df - self.min_df) / (self.max_df - self.min_df) scaled_df = scaled_df * (max_val - min_val) + min_val return scaled_df def inverse_transform(self, X: np.ndarray) -> np.ndarray: min_val, max_val = self.feature_range min_x, max_x = np.min(X), np.max(X) return (X - min_x) / (max_x - min_x) * (max_val - min_val) + min_val def check_existing_model(stock_symbol, start_date, end_date): repo_id = f"Q-bert/StockLlama-tuned-{stock_symbol}-{stock_symbol}-{start_date}_{end_date}" state = repo_id in [model.modelId for model in hf_api.list_models()] return state @spaces.GPU(duration=300) def train_stock_model(stock_symbol, start_date, end_date, feature_range=(10, 100), data_seq_length=256, epochs=10, batch_size=16, learning_rate=2e-4): repo_id = f"Q-bert/StockLlama-tuned{stock_symbol}-{start_date}_{end_date}" if check_existing_model(stock_symbol, start_date, end_date): return f"Model for {stock_symbol} from {start_date} to {end_date} already exists." try: stock_data = yf.download(stock_symbol, start=start_date, end=end_date, progress=False) except Exception as e: print(f"Error downloading data for {stock_symbol}: {e}") return data = stock_data["Close"] scaler = Scaler(feature_range) scaler.fit(data) scaled_data = scaler.transform(data) seq = [np.array(scaled_data[i:i + data_seq_length]) for i in range(len(scaled_data) - data_seq_length)] target = [np.array(scaled_data[i + data_seq_length:i + data_seq_length + 1]) for i in range(len(scaled_data) - data_seq_length)] seq_tensors = [torch.tensor(s, dtype=torch.float32) for s in seq] target_tensors = [t[0] for t in target] device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = StockLlamaForForecasting.from_pretrained("StockLlama/StockLlama").to(device) config = LoraConfig( r=64, lora_alpha=32, target_modules=["q_proj", "v_proj", "o_proj", "k_proj"], lora_dropout=0.05, bias="none", task_type="CAUSAL_LM", ) model = get_peft_model(model, config) dct = {"input_ids": seq_tensors, "label": target_tensors} dataset = Dataset.from_dict(dct) dataset.push_to_hub(repo_id) trainer = Trainer( model=model, train_dataset=dataset, args=TrainingArguments( per_device_train_batch_size=batch_size, gradient_accumulation_steps=4, num_train_epochs=epochs, warmup_steps=5, save_steps=10, learning_rate=learning_rate, fp16=True, logging_steps=1, push_to_hub=True, report_to="wandb", optim="adamw_torch", weight_decay=0.01, lr_scheduler_type="linear", seed=3407, output_dir=f"StockLlama/StockLlama-LoRA-{stock_symbol}-{start_date}_{end_date}", ), ) trainer.train() model = model.merge_and_unload() model.push_to_hub(f"StockLlama/StockLlama-tuned-{stock_symbol}-{start_date}_{end_date}") scaler_path = "scaler.joblib" joblib.dump(scaler, scaler_path) upload_file( path_or_fileobj=scaler_path, path_in_repo=f"scalers/{scaler_path}", repo_id=f"StockLlama/StockLlama-tuned-{stock_symbol}-{start_date}_{end_date}" ) return f"Training completed and model saved for {stock_symbol} from {start_date} to {end_date}." @spaces.GPU(duration=300) def gradio_train_stock_model(stock_symbol, start_date, end_date, feature_range_min, feature_range_max, data_seq_length, epochs, batch_size, learning_rate): feature_range = (feature_range_min, feature_range_max) result = train_stock_model( stock_symbol=stock_symbol, start_date=start_date, end_date=end_date, feature_range=feature_range, data_seq_length=data_seq_length, epochs=epochs, batch_size=batch_size, learning_rate=learning_rate ) return result iface = gr.Interface( fn=gradio_train_stock_model, inputs=[ gr.Textbox(label="Stock Symbol", value="BTC-USD"), gr.Textbox(label="Start Date", value="2023-01-01"), gr.Textbox(label="End Date", value="2024-08-24"), gr.Slider(minimum=0, maximum=100, step=1, label="Feature Range Min", value=10), gr.Slider(minimum=0, maximum=100, step=1, label="Feature Range Max", value=100), gr.Slider(minimum=1, maximum=512, step=1, label="Data Sequence Length", value=256), gr.Slider(minimum=1, maximum=50, step=1, label="Epochs", value=10), gr.Slider(minimum=1, maximum=64, step=1, label="Batch Size", value=16), gr.Slider(minimum=1e-5, maximum=1e-1, step=1e-5, label="Learning Rate", value=2e-4) ], outputs="text", ) iface.launch()