Spaces:

matikosowy
/

Photo_Colorizer

Runtime error

App Files Files Community

matikosowy commited on Sep 20, 2024

Commit

2620eb0

1 Parent(s): 3e2af2c

new model

Browse files

Files changed (4) hide show

app.py +18 -86
model.pth +2 -2
model.py +94 -0
utils.py +18 -0

app.py CHANGED Viewed

@@ -2,88 +2,14 @@ import gradio as gr
 import torch
 from PIL import Image
 from torchvision import transforms
-import torchvision.models as models
-import torch.nn as nn
-class ColorizingModel(nn.Module):
-    def __init__(self):
-        super(ColorizingModel, self).__init__()
-        self.encoder1 = nn.Sequential(
-            nn.Conv2d(1, 64, 3, 2, 1),  # 150x150 -> 75x75
-            nn.LeakyReLU()
-        )
-        self.encoder2 = nn.Sequential(
-            nn.Conv2d(64, 128, 3, 2, 1),  # 75x75 -> 38x38
-            nn.LeakyReLU()
-        )
-        self.encoder3 = nn.Sequential(
-            nn.Conv2d(128, 256, 3, 2, 1),  # 38x38 -> 19x19
-            nn.LeakyReLU()
-        )
-        self.encoder4 = nn.Sequential(
-            nn.Conv2d(256, 512, 3, 2, 1),  # 19x19 -> 10x10
-            nn.LeakyReLU()
-        )
-        # Bottleneck
-        self.bottleneck = nn.Sequential(
-            nn.Flatten(),
-            nn.Linear(512 * 10 * 10, 2048)
-        )
-        # Decoder
-        self.decoder_fc = nn.Sequential(
-            nn.Linear(2048, 512 * 10 * 10),
-            nn.Unflatten(1, (512, 10, 10))
-        )
-        self.decoder1 = nn.Sequential(
-            nn.ConvTranspose2d(512, 256, 3, 2, 1),  # 10x10 -> 19x19
-            nn.LeakyReLU()
-        )
-        self.decoder2 = nn.Sequential(
-            nn.ConvTranspose2d(256, 128, 3, 2, 1, output_padding=1),  # 19x19 -> 38x38
-            nn.LeakyReLU()
-        )
-        self.decoder3 = nn.Sequential(
-            nn.ConvTranspose2d(128, 64, 3, 2, 1),  # 38x38 -> 75x75
-            nn.LeakyReLU()
-        )
-        self.decoder4 = nn.Sequential(
-            nn.ConvTranspose2d(64, 3, 3, 2, 1, output_padding=1),  # 75x75 -> 150x150
-            nn.Sigmoid()
-        )
-    def forward(self, x):
-        # Encoder
-        enc1 = self.encoder1(x)  # 64 channels, 75x75
-        enc2 = self.encoder2(enc1)  # 128 channels, 38x38
-        enc3 = self.encoder3(enc2)  # 256 channels, 19x19
-        enc4 = self.encoder4(enc3)  # 512 channels, 10x10
-        # Bottleneck
-        bottleneck = self.bottleneck(enc4)
-        # Decoder (with skip connections)
-        dec_fc = self.decoder_fc(bottleneck)
-        dec1 = self.decoder1(dec_fc + enc4)  # Skip connection from encoder4
-        dec2 = self.decoder2(dec1 + enc3)  # Skip connection from encoder3
-        dec3 = self.decoder3(dec2 + enc2)  # Skip connection from encoder2
-        dec4 = self.decoder4(dec3 + enc1)  # Skip connection from encoder1
-        return dec4
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-model = ColorizingModel()
-model_weights = torch.load('model.pth', map_location=device)
 model.load_state_dict(model_weights)
 model = model.to(device)
 model.eval()
@@ -91,25 +17,31 @@ model.eval()
 # Define preprocessing transforms
 transform = transforms.Compose([
-    transforms.Resize((150, 150)),
     transforms.ToTensor(),
-    transforms.Normalize([0.5], [0.5])
 ])
 def preprocess(image):
-    image = image.convert('L')
     image = transform(image)
-    image = image.unsqueeze(0)
-    return image
 def predict(image):
-    image = preprocess(image).to(device)
     with torch.no_grad():
-        output = model(image)
     image = transforms.ToPILImage()(output.squeeze().cpu())
     return image

 import torch
 from PIL import Image
 from torchvision import transforms
+from utils import normalize_lab, denormalize_lab
+from model import Generator
+import kornia.color as color
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+model = Generator()
+model_weights = torch.load('model.pth', map_location=device, weights_only=True)
 model.load_state_dict(model_weights)
 model = model.to(device)
 model.eval()
 # Define preprocessing transforms
 transform = transforms.Compose([
+    transforms.Resize((256, 256), Image.BICUBIC),
     transforms.ToTensor(),
 ])
 def preprocess(image):
+    image = image.convert('RGB')
     image = transform(image)
+    image = image.to(device)
+    image = color.rgb_to_lab(image)
+    L = image[[0], ...]
+    L, _ = normalize_lab(L, 0)
+    print(L.shape)
+    return L.unsqueeze(0)
 def predict(image):
+    L = preprocess(image)
     with torch.no_grad():
+        output = model(L)
+    L, ab = denormalize_lab(L, output)
+    output = torch.cat([L, ab], dim=1)
+    output = color.lab_to_rgb(output)
     image = transforms.ToPILImage()(output.squeeze().cpu())
     return image

model.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bc47c60e171b66970021950e9af2fc8c9987fa6e029a7965a910063d4e618701
-size 851481002

 version https://git-lfs.github.com/spec/v1
+oid sha256:893644c8e4b35d8dde82b867753c33e364c76d51b10fffeeb1ddf600220f13e4
+size 217659569

model.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import torch.nn as nn
+import torch
+import torch.nn.functional as F
+# Dropout layer that works even in the evaluation mode
+class DropoutAlways(nn.Dropout2d):
+    def forward(self, x):
+        return F.dropout2d(x, self.p, training=True)
+class DownBlock(nn.Module):
+    def __init__(self, in_channels, out_channels, normalize=True):
+        super().__init__()
+        self.block = nn.Sequential(
+            nn.Conv2d(in_channels, out_channels, 4, 2, 1, padding_mode='reflect', bias=False if normalize else True),
+            nn.InstanceNorm2d(out_channels, affine=True) if normalize else nn.Identity(),
+            # Note that nn.Identity() is just a placeholder layer that returns its input.
+            nn.LeakyReLU(0.2),
+        )
+    def forward(self, x):
+        return self.block(x)
+class UpBlock(nn.Module):
+    def __init__(self, in_channels, out_channels, normalize=True, dropout=False, activation='relu'):
+        super().__init__()
+        self.block = nn.Sequential(
+            nn.ConvTranspose2d(in_channels, out_channels, 4, 2, 1, bias=False if normalize else True),
+            nn.InstanceNorm2d(out_channels, affine=True) if normalize else nn.Identity(),
+            DropoutAlways(p=0.5) if dropout else nn.Identity(),
+            nn.ReLU() if activation == 'relu' else nn.Tanh(),
+        )
+    def forward(self, x):
+        return self.block(x)
+class Generator(nn.Module):
+    def __init__(self):
+        super().__init__()
+        # Encoder
+        self.encoder1 = DownBlock(1, 64, normalize=False)  # 256x256 -> 128x128
+        self.encoder2 = DownBlock(64, 128)  # 128x128 -> 64x64
+        self.encoder3 = DownBlock(128, 256)  # 64x64 -> 32x32
+        self.encoder4 = DownBlock(256, 512)  # 32x32 -> 16x16
+        self.encoder5 = DownBlock(512, 512)  # 16x16 -> 8x8
+        self.encoder6 = DownBlock(512, 512)  # 8x8 -> 4x4
+        self.encoder7 = DownBlock(512, 512)  # 4x4 -> 2x2
+        self.encoder8 = DownBlock(512, 512, normalize=False)  # 2x2 -> 1x1
+        # Decoder
+        self.decoder1 = UpBlock(512, 512, dropout=True)  # 1x1 -> 2x2
+        self.decoder2 = UpBlock(512 * 2, 512, dropout=True)  # 2x2 -> 4x4
+        self.decoder3 = UpBlock(512 * 2, 512, dropout=True)  # 4x4 -> 8x8
+        self.decoder4 = UpBlock(512 * 2, 512)  # 8x8 -> 16x16
+        self.decoder5 = UpBlock(512 * 2, 256)  # 16x16 -> 32x32
+        self.decoder6 = UpBlock(256 * 2, 128)  # 32x32 -> 64x64
+        self.decoder7 = UpBlock(128 * 2, 64)  # 64x64 -> 128x128
+        self.decoder8 = UpBlock(64 * 2, 2, normalize=False, activation='tanh')  # 128x128 -> 256x256
+    def forward(self, x):
+        # Encoder
+        ch256_down = x
+        ch128_down = self.encoder1(ch256_down)
+        ch64_down = self.encoder2(ch128_down)
+        ch32_down = self.encoder3(ch64_down)
+        ch16_down = self.encoder4(ch32_down)
+        ch8_down = self.encoder5(ch16_down)
+        ch4_down = self.encoder6(ch8_down)
+        ch2_down = self.encoder7(ch4_down)
+        ch1 = self.encoder8(ch2_down)
+        # Decoder
+        ch2_up = self.decoder1(ch1)
+        ch2 = torch.cat([ch2_up, ch2_down], dim=1)
+        ch4_up = self.decoder2(ch2)
+        ch4 = torch.cat([ch4_up, ch4_down], dim=1)
+        ch8_up = self.decoder3(ch4)
+        ch8 = torch.cat([ch8_up, ch8_down], dim=1)
+        ch16_up = self.decoder4(ch8)
+        ch16 = torch.cat([ch16_up, ch16_down], dim=1)
+        ch32_up = self.decoder5(ch16)
+        ch32 = torch.cat([ch32_up, ch32_down], dim=1)
+        ch64_up = self.decoder6(ch32)
+        ch64 = torch.cat([ch64_up, ch64_down], dim=1)
+        ch128_up = self.decoder7(ch64)
+        ch128 = torch.cat([ch128_up, ch128_down], dim=1)
+        ch256_up = self.decoder8(ch128)
+        return ch256_up

utils.py ADDED Viewed

	@@ -0,0 +1,18 @@

+def normalize_lab(L, ab):
+    """
+    Normalize the L and ab channels of an image in Lab color space.
+    (Even though ab channels are in [-128, 127] range, we divide them by 110 because higher values are very rare.
+    This makes the distribution closer to [-1, 1] in most cases.)
+    """
+    L = L / 50. - 1.
+    ab = ab / 110.
+    return L, ab
+def denormalize_lab(L, ab):
+    """
+    Denormalize the L and ab channels of an image in Lab color space.
+    (reverse of normalize_lab function)
+    """
+    L = (L + 1) * 50.
+    ab = ab * 110.
+    return L, ab