|
import torch |
|
import torch.nn as nn |
|
import torch.optim as optim |
|
import torch.optim.lr_scheduler as lr_scheduler |
|
from dataloader import batch_size |
|
from dataloader import data_transform, data_loader |
|
from blocknet10 import CustomCIFAR10Net |
|
from analytics import model_analytics |
|
from push_to_hf import HF |
|
|
|
torch.manual_seed(42) |
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
print(f"Using device: {device}") |
|
|
|
transform_train, transform_test = data_transform() |
|
train_loader, test_loader = data_loader(transform_train, transform_test) |
|
|
|
def arch_tester(): |
|
model = CustomCIFAR10Net() |
|
input_data = torch.randn(batch_size, 3, 32, 32) |
|
output = model(input_data) |
|
return output.shape |
|
|
|
arch_tester_output = arch_tester() |
|
print(arch_tester_output) |
|
|
|
model = CustomCIFAR10Net().to(device) |
|
|
|
criterion = nn.CrossEntropyLoss() |
|
optimizer = optim.AdamW(model.parameters(), lr=0.01, betas=(0.8, 0.95), weight_decay=0.0005, amsgrad=True, eps=1e-8) |
|
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=200) |
|
|
|
train_losses = [] |
|
train_accuracies = [] |
|
test_accuracies = [] |
|
|
|
num_epochs = 50 |
|
total_steps = len(train_loader) * num_epochs |
|
step_count = 0 |
|
|
|
for epoch in range(num_epochs): |
|
running_loss = 0.0 |
|
correct_train = 0 |
|
total_train = 0 |
|
|
|
for i, (inputs, labels) in enumerate(train_loader, 0): |
|
inputs, labels = inputs.to(device), labels.to(device) |
|
optimizer.zero_grad() |
|
outputs = model(inputs) |
|
loss = criterion(outputs, labels) |
|
loss.backward() |
|
optimizer.step() |
|
running_loss += loss.item() |
|
step_count += 1 |
|
if step_count % 100 == 0: |
|
train_losses.append(running_loss / 100) |
|
print(f'[Epoch: {epoch + 1}, Step: {step_count:5d}/{total_steps}] loss: {running_loss / 100:.3f}') |
|
running_loss = 0.0 |
|
|
|
if i == len(train_loader) - 1: |
|
model.eval() |
|
with torch.no_grad(): |
|
for images, labels in train_loader: |
|
images, labels = images.to(device), labels.to(device) |
|
outputs = model(images) |
|
_, predicted = torch.max(outputs.data, 1) |
|
total_train += labels.size(0) |
|
correct_train += (predicted == labels).sum().item() |
|
|
|
train_accuracy = 100 * correct_train / total_train |
|
train_accuracies.append(train_accuracy) |
|
|
|
scheduler.step() |
|
|
|
model.eval() |
|
correct_test = 0 |
|
total_test = 0 |
|
|
|
with torch.no_grad(): |
|
for images, labels in test_loader: |
|
images, labels = images.to(device), labels.to(device) |
|
outputs = model(images) |
|
_, predicted = torch.max(outputs.data, 1) |
|
total_test += labels.size(0) |
|
correct_test += (predicted == labels).sum().item() |
|
|
|
test_accuracy = 100 * correct_test / total_test |
|
test_accuracies.append(test_accuracy) |
|
|
|
print(f'Epoch {epoch + 1}: Test Accuracy = {test_accuracy:.2f}%') |
|
|
|
|
|
print("Last Train Losses:" + str(train_losses[-1])) |
|
print("Last Train Accuracy:" + str(train_accuracies[-1])) |
|
print("Last Test Accuracy:" + str(test_accuracies[-1])) |
|
|
|
analytics = model_analytics(train_losses, train_accuracies, test_accuracies) |
|
|
|
huggingface = HF() |
|
push_to_face = huggingface.push_to_face(model=model) |
|
print(push_to_face) |