nofl commited on
Commit
8f85551
·
verified ·
1 Parent(s): 8f6465e

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +53 -99
Dockerfile CHANGED
@@ -1,99 +1,53 @@
1
- from aim import Run
2
- from aim.pytorch import track_gradients_dists, track_params_dists
3
- import torch
4
- import torch.nn as nn
5
- import torch.optim as optim
6
- from torchvision import datasets, transforms
7
- from tqdm import tqdm
8
-
9
- batch_size = 64
10
- epochs = 10
11
- learning_rate = 0.01
12
-
13
- aim_run = Run()
14
-
15
- class CNN(nn.Module):
16
- def __init__(self):
17
- super(CNN, self).__init__()
18
- self.conv1 = nn.Conv2d(1, 32, 3, 1)
19
- self.conv2 = nn.Conv2d(32, 64, 3, 1)
20
- self.pool = nn.MaxPool2d(2, 2)
21
- self.fc1 = nn.Linear(64 * 7 * 7, 128)
22
- self.fc2 = nn.Linear(128, 10)
23
-
24
- def forward(self, x):
25
- x = self.pool(torch.relu(self.conv1(x)))
26
- x = self.pool(torch.relu(self.conv2(x)))
27
- x = torch.flatten(x, 1)
28
- x = torch.relu(self.fc1(x))
29
- x = self.fc2(x)
30
- return x
31
-
32
- train_dataset = datasets.MNIST(root='./data',
33
- train=True,
34
- transform=transforms.ToTensor(),
35
- download=True)
36
-
37
- test_dataset = datasets.MNIST(root='./data',
38
- train=False,
39
- transform=transforms.ToTensor())
40
-
41
- train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
42
- batch_size=batch_size,
43
- shuffle=True)
44
-
45
- test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
46
- batch_size=batch_size,
47
- shuffle=False)
48
-
49
- model = CNN()
50
- optimizer = optim.Adam(model.parameters(), lr=learning_rate)
51
- criterion = nn.CrossEntropyLoss()
52
-
53
- for epoch in range(epochs):
54
- model.train()
55
- train_loss = 0
56
- correct = 0
57
- total = 0
58
-
59
- for batch_idx, (data, target) in enumerate(tqdm(train_loader, desc="Training", leave=False)):
60
- optimizer.zero_grad()
61
- output = model(data)
62
- loss = criterion(output, target)
63
- loss.backward()
64
- optimizer.step()
65
-
66
- train_loss += loss.item()
67
- _, predicted = torch.max(output.data, 1)
68
- total += target.size(0)
69
- correct += (predicted == target).sum().item()
70
-
71
- acc = correct / total
72
- items = {'accuracy': acc, 'loss': train_loss / len(train_loader)}
73
- aim_run.track(items, epoch=epoch, context={'subset': 'train'})
74
-
75
- track_params_dists(model, aim_run, epoch=epoch, context={'subset': 'train'})
76
- track_gradients_dists(model, aim_run, epoch=epoch, context={'subset': 'train'})
77
-
78
- model.eval()
79
- test_loss = 0
80
- correct = 0
81
- total = 0
82
-
83
- with torch.no_grad():
84
- for batch_idx, (data, target) in enumerate(tqdm(test_loader, desc="Testing", leave=False)):
85
- output = model(data)
86
- loss = criterion(output, target)
87
- test_loss += loss.item()
88
- _, predicted = torch.max(output.data, 1)
89
- total += target.size(0)
90
- correct += (predicted == target).sum().item()
91
-
92
- acc = correct / total
93
- items = {'accuracy': acc, 'loss': test_loss / len(test_loader)}
94
- aim_run.track(items, epoch=epoch, context={'subset': 'test'})
95
-
96
- track_params_dists(model, aim_run, epoch=epoch, context={'subset': 'test'})
97
- track_gradients_dists(model, aim_run, epoch=epoch, context={'subset': 'test'})
98
-
99
- torch.save(model.state_dict(), 'mnist_cnn.pth')
 
1
+ FROM python:3.9
2
+
3
+
4
+
5
+
6
+
7
+
8
+
9
+ RUN useradd -m -u 1000 aim_user
10
+
11
+
12
+
13
+ # Switch to the "aim_user" user
14
+ USER aim_user
15
+
16
+ # Set home to the user's home directory
17
+ ENV HOME=/home/aim_user \
18
+ PATH=/home/aim_user/.local/bin:$PATH
19
+
20
+
21
+
22
+
23
+
24
+
25
+ # Set the working directory to the user's home directory
26
+ WORKDIR $HOME
27
+
28
+
29
+
30
+
31
+
32
+
33
+ # install the `aim` package on the latest version
34
+ RUN pip install aim
35
+
36
+
37
+
38
+ RUN aim telemetry off
39
+
40
+
41
+
42
+ ENTRYPOINT ["/bin/sh", "-c"]
43
+
44
+
45
+
46
+ COPY aim_repo.tar.gz .
47
+ RUN tar xvzf aim_repo.tar.gz
48
+ # have to run `aim init` in the directory that stores aim data for
49
+ # otherwise `aim up` will prompt for confirmation to create the directory itself.
50
+ # We run aim listening on 0.0.0.0 to expose all ports. Also, we run
51
+ # using `--dev` to print verbose logs. Port 43800 is the default port of
52
+ # `aim up` but explicit is better than implicit.
53
+ CMD ["aim up --host 0.0.0.0 --port 7860 --workers 2"]