VatsalPatel18 commited on
Commit
49f4666
·
1 Parent(s): a39812a

Revized docker file

Browse files
.ipynb_checkpoints/Dockerfile-checkpoint ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.8-slim-buster
3
+
4
+ # Set the working directory in the container
5
+ WORKDIR /app
6
+
7
+ # Install system and Python dependencies
8
+ RUN apt-get update && \
9
+ apt-get install -y build-essential openslide-tools libgl1-mesa-glx && \
10
+ apt-get clean && \
11
+ rm -rf /var/lib/apt/lists/*
12
+
13
+ # Add a non-root user with a specified UID
14
+ ARG USER_ID
15
+ RUN adduser --disabled-password --gecos '' --uid $USER_ID myuser
16
+
17
+ # Copy the entire genomic_plip_model directory contents into the container at /app
18
+ COPY ./ /app/
19
+
20
+ # Install Python dependencies
21
+ RUN pip install -r requirements.txt
22
+
23
+ # Set the user to the newly created non-root user
24
+ USER myuser
25
+
26
+ # Expose a port (if necessary for your application)
27
+ EXPOSE 8888
28
+
29
+ # Set the entrypoint to a shell command
30
+ ENTRYPOINT ["/bin/bash"]
.ipynb_checkpoints/requirements-checkpoint.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ numpy==1.19.2
2
+ pandas==1.3.4
3
+ matplotlib==3.5.2
4
+ openslide-python==1.1.2
5
+ scikit-image==0.18.1
6
+ scikit-learn==1.2.1
7
+ tqdm==4.62.3
8
+ Pillow==9.4.0
9
+ transformers==4.33.2
10
+ torch==2.0.1
11
+ jupyterlab==3.2.1
12
+ tensorflow==2.6.1
.ipynb_checkpoints/train_omics_plip_model-checkpoint.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import argparse
3
+ from torch import optim
4
+ from torch.utils.data import DataLoader
5
+ from scripts.genomic_plip_model import GenomicPLIPModel
6
+ from scripts.tile_file_dataloader import FlatTileDataset
7
+ from transformers import CLIPVisionModel
8
+
9
+ def train_model(data_dir, model_save_path, pretrained_model_path, lr, num_epochs, train_batch_size, validation_batch_size, num_workers):
10
+
11
+ # Load datasets
12
+ train_dataset = FlatTileDataset(data_dir=f'{data_dir}/train')
13
+ train_data_loader = DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True, num_workers=num_workers)
14
+
15
+ validation_dataset = FlatTileDataset(data_dir=f'{data_dir}/validate')
16
+ validation_data_loader = DataLoader(validation_dataset, batch_size=validation_batch_size, shuffle=False, num_workers=num_workers)
17
+
18
+ # Initialize the model
19
+ base_model = CLIPVisionModel.from_pretrained(pretrained_model_path)
20
+ custom_model = GenomicPLIPModel(base_model)
21
+
22
+ criterion = torch.nn.CosineSimilarity(dim=1)
23
+ optimizer = optim.Adam(custom_model.parameters(), lr=lr)
24
+
25
+
26
+ for epoch in range(num_epochs):
27
+ # Training loop
28
+ custom_model.train()
29
+ train_loss = 0.0
30
+
31
+ for batch_images, batch_scores in train_data_loader:
32
+ optimizer.zero_grad()
33
+
34
+ batch_loss = 0
35
+ for img, score in zip(batch_images, batch_scores):
36
+ vision_features, score_features = custom_model(img.unsqueeze(0), score.unsqueeze(0))
37
+ cos_sim = criterion(score_features, vision_features)
38
+ loss = 1-cos_sim.mean()
39
+
40
+ batch_loss += loss.item()
41
+ loss.backward()
42
+
43
+ optimizer.step()
44
+ train_loss += batch_loss
45
+ print(f"Batch Cosine Similarity {batch_loss:.4f}")
46
+
47
+ avg_train_loss = train_loss / len(train_data_loader)
48
+ print(f"Epoch [{epoch+1}/{num_epochs}], Training Cosine Similarity: {avg_train_loss:.4f}")
49
+
50
+ # Validation loop
51
+ custom_model.eval()
52
+ validation_loss = 0.0
53
+
54
+ with torch.no_grad():
55
+ for batch_images, batch_scores in validation_data_loader:
56
+ batch_loss = 0
57
+ for img, score in zip(batch_images, batch_scores):
58
+ vision_features, score_features = custom_model(img.unsqueeze(0), score.unsqueeze(0))
59
+ cos_sim = criterion(score_features, vision_features)
60
+ loss = 1-cos_sim.mean()
61
+
62
+ batch_loss += loss.item()
63
+
64
+ validation_loss += batch_loss
65
+ print(f"Validation Batch Cosine Similarity {batch_loss:.4f}")
66
+
67
+ avg_validation_loss = validation_loss / len(validation_data_loader)
68
+ print(f"Epoch [{epoch+1}/{num_epochs}], Validation Cosine Similarity: {avg_validation_loss:.4f}")
69
+
70
+ # Save the trained model
71
+ torch.save(custom_model.state_dict(), model_save_path)
72
+
73
+ if __name__ == "__main__":
74
+ parser = argparse.ArgumentParser(description='Train the Genomic PLIP Model')
75
+ parser.add_argument('--data_dir', type=str, default='Datasets/train_03', help='Directory containing the train, validate, and test datasets.')
76
+ parser.add_argument('--model_save_path', type=str, default='genomic_plip.pth', help='Path to save the trained model.')
77
+ parser.add_argument('--pretrained_model_path', type=str, default='./plip', help='Path to the pretrained CLIP model.')
78
+
79
+ parser.add_argument('--lr', type=float, default=0.00001, help='Learning rate for the optimizer.')
80
+ parser.add_argument('--num_epochs', type=int, default=1, help='Number of epochs to train for.')
81
+ parser.add_argument('--train_batch_size', type=int, default=128, help='Batch size for the training data loader.')
82
+ parser.add_argument('--validation_batch_size', type=int, default=128, help='Batch size for the validation data loader.')
83
+ parser.add_argument('--num_workers', type=int, default=32, help='Number of worker threads for data loading.')
84
+
85
+
86
+ args = parser.parse_args()
87
+
88
+ train_model(args.data_dir, args.model_save_path, args.pretrained_model_path, args.lr, args.num_epochs, args.train_batch_size, args.validation_batch_size, args.num_workers)
89
+
Dockerfile CHANGED
@@ -10,16 +10,20 @@ RUN apt-get update && \
10
  apt-get clean && \
11
  rm -rf /var/lib/apt/lists/*
12
 
13
- # Copy the entire genomic_plip_model directory contents into the container at /app
14
- RUN adduser --disabled-password --gecos '' myuser
15
- USER myuser
16
 
 
17
  COPY ./ /app/
 
18
  # Install Python dependencies
19
- RUN pip install --no-cache-dir -r requirements.txt
20
 
21
- # Create a non-root user and switch to it for security
 
22
 
 
23
  EXPOSE 8888
24
 
25
  # Set the entrypoint to a shell command
 
10
  apt-get clean && \
11
  rm -rf /var/lib/apt/lists/*
12
 
13
+ # Add a non-root user with a specified UID
14
+ ARG USER_ID
15
+ RUN adduser --disabled-password --gecos '' --uid $USER_ID myuser
16
 
17
+ # Copy the entire genomic_plip_model directory contents into the container at /app
18
  COPY ./ /app/
19
+
20
  # Install Python dependencies
21
+ RUN pip install -r requirements.txt
22
 
23
+ # Set the user to the newly created non-root user
24
+ USER myuser
25
 
26
+ # Expose a port (if necessary for your application)
27
  EXPOSE 8888
28
 
29
  # Set the entrypoint to a shell command
train_omics_plip_model.py CHANGED
@@ -35,7 +35,7 @@ def train_model(data_dir, model_save_path, pretrained_model_path, lr, num_epochs
35
  for img, score in zip(batch_images, batch_scores):
36
  vision_features, score_features = custom_model(img.unsqueeze(0), score.unsqueeze(0))
37
  cos_sim = criterion(score_features, vision_features)
38
- loss = -cos_sim.mean()
39
 
40
  batch_loss += loss.item()
41
  loss.backward()
@@ -57,7 +57,7 @@ def train_model(data_dir, model_save_path, pretrained_model_path, lr, num_epochs
57
  for img, score in zip(batch_images, batch_scores):
58
  vision_features, score_features = custom_model(img.unsqueeze(0), score.unsqueeze(0))
59
  cos_sim = criterion(score_features, vision_features)
60
- loss = -cos_sim.mean()
61
 
62
  batch_loss += loss.item()
63
 
 
35
  for img, score in zip(batch_images, batch_scores):
36
  vision_features, score_features = custom_model(img.unsqueeze(0), score.unsqueeze(0))
37
  cos_sim = criterion(score_features, vision_features)
38
+ loss = 1-cos_sim.mean()
39
 
40
  batch_loss += loss.item()
41
  loss.backward()
 
57
  for img, score in zip(batch_images, batch_scores):
58
  vision_features, score_features = custom_model(img.unsqueeze(0), score.unsqueeze(0))
59
  cos_sim = criterion(score_features, vision_features)
60
+ loss = 1-cos_sim.mean()
61
 
62
  batch_loss += loss.item()
63