"""
Configuration for Baseline Transformer on enwik8.
Matches DTAT's training setup for fair comparison.
"""

class BaselineConfig:
    def __init__(self):
        # Model architecture (exactly matching DTAT)
        self.n_layer = 12
        self.n_head = 8  # Same as DTAT
        self.n_embd = 512  # Same as DTAT
        self.dropout = 0.1
        self.bias = True
        
        # Sequence parameters
        self.block_size = 1024  # Same as DTAT
        self.vocab_size = 256  # For character-level model
        
        # Training parameters (matched with DTAT)
        self.learning_rate = 6e-4
        self.min_lr = 1e-5         # Lower minimum to allow fine-tuning
        self.warmup_iters = 367     # 5% of total iterations
        self.max_iters = 7334      # Exactly 4 epochs with batch_size=24
        self.weight_decay = 0.1     # Same as DTAT
        self.beta1 = 0.9
        self.beta2 = 0.95
        self.grad_clip = 1.0
        
        # Learning rate schedule
        self.decay_lr = True
        self.lr_decay_iters = 5000  # Same as DTAT
        
        # Early stopping
        self.patience = 15          # Same as DTAT
        self.min_delta = 0.005     # Same as DTAT
        self.eval_interval = 250    # Same as DTAT
        self.eval_iters = 200      # Same as DTAT
        
        # Logging
        self.log_interval = 10
        
        # Mixed precision training
        self.mixed_precision = True
        self.dtype = 'bfloat16'
        
        # Memory optimization
        self.gradient_checkpointing = True
        self.batch_size = 24        # Same as DTAT
        
        # System
        self.device = 'cuda'
        self.compile = True
        
        # Performance optimization
        self.compile_model = True
        self.cudnn_benchmark = True
        
        # Git config for model versioning
        self.git_name = "Your Name"
        self.git_email = "your.email@example.com"
    
    def get_config(self):
        return self

def get_config():
    """Helper function to get config instance."""
    return BaselineConfig()