README.md · jpfraneto/anky-degen-pixels at main

metadata

license: mit

This model was trained using the 8888 images of the Anky Genesis NFT Collection, and its mission is to transform an image into pixel art, like so:

The code used for training it is the following:

import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image
import numpy as np

# Custom dataset for loading the images
class PixelArtDataset(Dataset):
    def __init__(self, image_folder, transform=None):
        self.image_folder = image_folder
        self.transform = transform
        self.image_files = [f"{i}.png" for i in range(1, 8889)]
        
        # Debug: Check if images are correctly listed
        print(f"Total images found: {len(self.image_files)}")

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_folder, self.image_files[idx])
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, image

# Define the neural network
class PixelArtGenerator(nn.Module):
    def __init__(self):
        super(PixelArtGenerator, self).__init__()
        print("Initializing PixelArtGenerator Model...")
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 3, kernel_size=4, stride=2, padding=1),
            nn.Tanh()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

def train(model, dataloader, criterion, optimizer, device, epochs=50):
    print("Starting training...")
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        print(f"Epoch [{epoch+1}/{epochs}] starting...")
        for batch_idx, (input_images, target_images) in enumerate(dataloader):
            input_images, target_images = input_images.to(device), target_images.to(device)
            optimizer.zero_grad()
            outputs = model(input_images)
            loss = criterion(outputs, target_images)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            
            # Debug: Print progress for every batch
            if batch_idx % 10 == 0:
                print(f"Epoch [{epoch+1}/{epochs}], Batch [{batch_idx+1}/{len(dataloader)}], Loss: {loss.item():.4f}")

        print(f"Epoch [{epoch+1}/{epochs}] completed with Loss: {running_loss/len(dataloader):.4f}")

def create_pixel_art(model, input_image_path, output_image_path, device):
    print("Creating pixel art...")
    model.eval()
    transform = transforms.Compose([
        transforms.Resize((64, 64)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
    image = Image.open(input_image_path).convert("RGB")
    input_image = transform(image).unsqueeze(0).to(device)
    with torch.no_grad():
        output_image = model(input_image).squeeze(0).cpu().numpy()
        output_image = np.transpose(output_image, (1, 2, 0))
        output_image = (output_image * 0.5 + 0.5) * 255.0
        output_image = np.clip(output_image, 0, 255).astype(np.uint8)
        output_image = Image.fromarray(output_image)
        output_image.save(output_image_path)
    print(f"Pixel art saved to {output_image_path}")

if __name__ == "__main__":
    # Transform for input images
    print("Setting up image transformations...")
    transform = transforms.Compose([
        transforms.Resize((64, 64)),  # Resize to 64x64 for input
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    # Load dataset
    print("Loading dataset...")
    image_folder = "./"  # Change this to your images folder path
    dataset = PixelArtDataset(image_folder, transform)
    dataloader = DataLoader(dataset, batch_size=8, shuffle=True)  # Reduce batch size for debugging

    # Check for GPU availability
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # Initialize the model, criterion, and optimizer
    model = PixelArtGenerator().to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.0002)

    # Enable data parallelism if multiple GPUs are available
    if torch.cuda.device_count() > 1:
        print(f"Using {torch.cuda.device_count()} GPUs")
        model = nn.DataParallel(model)

    # Train the model
    train(model, dataloader, criterion, optimizer, device, epochs=50)

    # Save the model
    torch.save(model.state_dict(), "pixel_art_generator.pth")
    print("Model saved as 'pixel_art_generator.pth'")

    # Create pixel art from a new input image
    input_image_path = "input_image.png"  # Path to the high-resolution input image
    output_image_path = "pixel_art.png"  # Path to save the generated pixel art
    create_pixel_art(model, input_image_path, output_image_path, device)
    print("Pixel art creation completed.")

The training happened on a Cognition PRO called poiesis. It consisted of 50 epochs, and it lasted for about 4 hours running on 2x NVIDIA RTX 4090.

Its intended usage is for it to transform any image into its corresponding in pixels, as you can see on this one.

For running it like such, you can run the following python code on the containing folder of the model (for transforming an image called pfp.png):

import torch
import torch.nn as nn
from PIL import Image
import numpy as np
from torchvision import transforms
import os

# Define the neural network (same as the one used during training)
class PixelArtGenerator(nn.Module):
    def __init__(self):
        super(PixelArtGenerator, self).__init__()
        print("Initializing PixelArtGenerator Model...")
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 3, kernel_size=4, stride=2, padding=1),
            nn.Tanh()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

def create_pixel_art(model, input_image_path, output_image_path, device):
    print(f"Creating pixel art for {input_image_path}...")

    # Check if the input image file exists
    if not os.path.isfile(input_image_path):
        print(f"Error: Input image file '{input_image_path}' not found.")
        return
    
    model.eval()
    print("Model set to evaluation mode.")

    # Define the transformation for the input image
    transform = transforms.Compose([
        transforms.Resize((64, 64)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
    print("Image transformation defined.")

    # Load and preprocess the input image
    image = Image.open(input_image_path).convert("RGB")
    input_image = transform(image).unsqueeze(0).to(device)
    print(f"Input image '{input_image_path}' loaded and preprocessed.")

    # Generate pixel art using the model
    with torch.no_grad():
        output_image = model(input_image).squeeze(0).cpu().numpy()
        print("Pixel art generated by the model.")

    # Post-process and save the output image
    output_image = np.transpose(output_image, (1, 2, 0))
    output_image = (output_image * 0.5 + 0.5) * 255.0
    output_image = np.clip(output_image, 0, 255).astype(np.uint8)
    output_image = Image.fromarray(output_image)

    # Scale up the image to iPhone 11 width (828 pixels)
    scaled_output_image = output_image.resize((828, int(828 * output_image.size[1] / output_image.size[0])), Image.NEAREST)
    scaled_output_image.save(output_image_path)
    print(f"Pixel art saved to '{output_image_path}'.")

if __name__ == "__main__":
    print("Starting pixel art generation script...")

    # Load the trained model
    model = PixelArtGenerator()
    model_path = "pixel_art_generator.pth"  # Path to the saved model
    print(f"Loading model from '{model_path}'...")

    # Load model with handling for DataParallel
    state_dict = torch.load(model_path)
    if 'module.' in list(state_dict.keys())[0]:
        # Remove 'module.' prefix if model was saved with DataParallel
        state_dict = {k.replace('module.', ''): v for k, v in state_dict.items()}
    model.load_state_dict(state_dict)
    print("Model loaded successfully.")
    
    # Check for GPU availability
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(device)
    print(f"Using device: {device}")

    # Define the input and output paths for the single image
    input_image_path = "pfp.jpeg"  # Path to the input image
    output_image_path = "pfp_pixelated.png"  # Path to save the generated pixel art

    # Create pixel art for the single image
    create_pixel_art(model, input_image_path, output_image_path, device)

    print("Pixel art creation completed for the single image.")

Hope you enjoy, and any questions that you may have, feel free to reach out to @jpfraneto on telegram.

If you want to contribute to Anky, we have plenty of compute available, and a powerful story (and intention) that puts the unfolding of AI at the core of our experience as humans.

Think of it as a playground for your inner child, with boundless potential.

Our farcaster channel is here: https://warpcast.com/~/channel/anky

Your uniqueness is a gift.

🎩