Token Classification
fastai
code
art
chemistry
At-Tawheed commited on
Commit
187662d
·
verified ·
1 Parent(s): b88f3f2

Create qa1.0.0

Browse files
Files changed (1) hide show
  1. qa1.0.0 +232 -0
qa1.0.0 ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Quantumaurora: Advanced Transformer-based Language Model
3
+ Version: 1.0.0
4
+ Created: 2025
5
+ """
6
+
7
+ import numpy as np
8
+ import torch
9
+ import torch.nn as nn
10
+ import torch.nn.functional as F
11
+ from torch.utils.data import Dataset, DataLoader
12
+ from transformers import PreTrainedTokenizerFast
13
+ from tokenizers import Tokenizer, models, trainers, pre_tokenizers, decoders
14
+ import math
15
+ from typing import Optional, Dict, List, Tuple
16
+ from torch.cuda.amp import autocast, GradScaler
17
+ from torch.nn.parallel import DistributedDataParallel
18
+ import torch.distributed as dist
19
+ import torch.multiprocessing as mp
20
+ from torch.utils.checkpoint import checkpoint
21
+ import json
22
+ import os
23
+ from datetime import datetime
24
+
25
+ class QuantumauroraConfig:
26
+ """Configuration class for Quantumaurora model"""
27
+ def __init__(self,
28
+ vocab_size: int = 50000,
29
+ d_model: int = 512,
30
+ num_heads: int = 8,
31
+ num_layers: int = 6,
32
+ d_ff: int = 2048,
33
+ dropout: float = 0.1,
34
+ attention_type: str = "full",
35
+ use_checkpointing: bool = True,
36
+ max_sequence_length: int = 2048,
37
+ model_version: str = "1.0.0"):
38
+ self.vocab_size = vocab_size
39
+ self.d_model = d_model
40
+ self.num_heads = num_heads
41
+ self.num_layers = num_layers
42
+ self.d_ff = d_ff
43
+ self.dropout = dropout
44
+ self.attention_type = attention_type
45
+ self.use_checkpointing = use_checkpointing
46
+ self.max_sequence_length = max_sequence_length
47
+ self.model_version = model_version
48
+ self.model_type = "quantumaurora"
49
+
50
+ def save(self, path: str):
51
+ """Save configuration to JSON file"""
52
+ config_dict = self.__dict__
53
+ config_dict['timestamp'] = datetime.now().isoformat()
54
+
55
+ with open(path, 'w') as f:
56
+ json.dump(config_dict, f, indent=2)
57
+
58
+ @classmethod
59
+ def load(cls, path: str) -> 'QuantumauroraConfig':
60
+ """Load configuration from JSON file"""
61
+ with open(path, 'r') as f:
62
+ config_dict = json.load(f)
63
+
64
+ # Remove timestamp from loaded config
65
+ if 'timestamp' in config_dict:
66
+ del config_dict['timestamp']
67
+
68
+ return cls(**config_dict)
69
+
70
+ class Quantumaurora(nn.Module):
71
+ """
72
+ Quantumaurora: Advanced Transformer-based Language Model
73
+
74
+ A state-of-the-art language model featuring:
75
+ - Multi-head attention with sparse/local patterns
76
+ - Multiple pre-training objectives
77
+ - Gradient checkpointing
78
+ - Mixed precision training
79
+ - Distributed training support
80
+ """
81
+
82
+ def __init__(self, config: QuantumauroraConfig):
83
+ super().__init__()
84
+ self.config = config
85
+
86
+ # Model components
87
+ self.token_embedding = nn.Embedding(config.vocab_size, config.d_model)
88
+ self.positional_encoding = PositionalEncoding(config.d_model)
89
+
90
+ self.transformer_blocks = nn.ModuleList([
91
+ TransformerBlock(
92
+ config.d_model,
93
+ config.num_heads,
94
+ config.d_ff,
95
+ config.dropout,
96
+ config.attention_type
97
+ ) for _ in range(config.num_layers)
98
+ ])
99
+
100
+ self.pretraining_objectives = PreTrainingObjectives(
101
+ config.d_model,
102
+ config.vocab_size
103
+ )
104
+
105
+ self.dropout = nn.Dropout(config.dropout)
106
+
107
+ def forward(self, x: torch.Tensor, mask: Optional[torch.Tensor] = None) -> Dict[str, torch.Tensor]:
108
+ x = self.token_embedding(x)
109
+ x = self.positional_encoding(x)
110
+ x = self.dropout(x)
111
+
112
+ for transformer_block in self.transformer_blocks:
113
+ if self.config.use_checkpointing and self.training:
114
+ x = checkpoint(transformer_block, x, mask)
115
+ else:
116
+ x = transformer_block(x, mask)
117
+
118
+ return self.pretraining_objectives(x)
119
+
120
+ def save_pretrained(self, path: str):
121
+ """Save model and configuration"""
122
+ os.makedirs(path, exist_ok=True)
123
+
124
+ # Save configuration
125
+ config_path = os.path.join(path, 'config.json')
126
+ self.config.save(config_path)
127
+
128
+ # Save model weights
129
+ model_path = os.path.join(path, 'model.pt')
130
+ torch.save(self.state_dict(), model_path)
131
+
132
+ # Save tokenizer if available
133
+ if hasattr(self, 'tokenizer'):
134
+ tokenizer_path = os.path.join(path, 'tokenizer.json')
135
+ self.tokenizer.save(tokenizer_path)
136
+
137
+ @classmethod
138
+ def from_pretrained(cls, path: str) -> 'Quantumaurora':
139
+ """Load pretrained model and configuration"""
140
+ config = QuantumauroraConfig.load(os.path.join(path, 'config.json'))
141
+ model = cls(config)
142
+
143
+ model_path = os.path.join(path, 'model.pt')
144
+ model.load_state_dict(torch.load(model_path))
145
+
146
+ # Load tokenizer if available
147
+ tokenizer_path = os.path.join(path, 'tokenizer.json')
148
+ if os.path.exists(tokenizer_path):
149
+ model.tokenizer = PreTrainedTokenizerFast.from_file(tokenizer_path)
150
+
151
+ return model
152
+
153
+ class QuantumauroraTrainer:
154
+ """Training manager for Quantumaurora model"""
155
+
156
+ def __init__(self,
157
+ model: Quantumaurora,
158
+ train_dataloader: DataLoader,
159
+ optimizer: torch.optim.Optimizer,
160
+ device: str = "cuda",
161
+ use_mixed_precision: bool = True,
162
+ distributed: bool = True):
163
+ self.model = model
164
+ self.train_dataloader = train_dataloader
165
+ self.optimizer = optimizer
166
+ self.device = device
167
+ self.use_mixed_precision = use_mixed_precision
168
+ self.distributed = distributed
169
+
170
+ if use_mixed_precision:
171
+ self.scaler = GradScaler()
172
+
173
+ if distributed:
174
+ self.model = DistributedDataParallel(model)
175
+
176
+ def train(self, num_epochs: int, save_dir: str = None):
177
+ """Main training loop"""
178
+ best_loss = float('inf')
179
+
180
+ for epoch in range(num_epochs):
181
+ losses = self.train_epoch(epoch)
182
+
183
+ # Save checkpoint if this is the best model
184
+ if save_dir and losses['total'] < best_loss:
185
+ best_loss = losses['total']
186
+ self.model.save_pretrained(os.path.join(save_dir, f'checkpoint-{epoch}'))
187
+
188
+ print(f"Epoch {epoch+1}/{num_epochs}")
189
+ for loss_name, loss_value in losses.items():
190
+ print(f"{loss_name}: {loss_value:.4f}")
191
+
192
+ def main():
193
+ """Example usage of Quantumaurora"""
194
+
195
+ # Initialize configuration
196
+ config = QuantumauroraConfig(
197
+ vocab_size=50000,
198
+ d_model=768,
199
+ num_heads=12,
200
+ num_layers=12,
201
+ attention_type="sparse"
202
+ )
203
+
204
+ # Initialize model
205
+ model = Quantumaurora(config)
206
+
207
+ # Multi-GPU training if available
208
+ world_size = torch.cuda.device_count()
209
+ if world_size > 1:
210
+ mp.spawn(
211
+ train_distributed,
212
+ args=(world_size, model, dataset),
213
+ nprocs=world_size,
214
+ join=True
215
+ )
216
+ else:
217
+ # Single GPU training
218
+ trainer = QuantumauroraTrainer(
219
+ model=model,
220
+ train_dataloader=train_dataloader,
221
+ optimizer=torch.optim.Adam(model.parameters()),
222
+ use_mixed_precision=True,
223
+ distributed=False
224
+ )
225
+
226
+ trainer.train(
227
+ num_epochs=10,
228
+ save_dir='quantumaurora_checkpoints'
229
+ )
230
+
231
+ if __name__ == "__main__":
232
+ main()