|
--- |
|
license: mit |
|
--- |
|
|
|
This model was trained using the 8888 images of the [Anky Genesis NFT Collection](https://drive.google.com/drive/folders/1OBDQ08r8pLN4nfNf-48j87wzUEmF-ox4?usp=sharing), and its mission is to transform an image into pixel art, like so: |
|
|
|
![Anky Degen Pixel Example](https://github.com/jpfraneto/images/blob/main/ankydegenpixel.png?raw=true) |
|
|
|
The code used for training it is the following: |
|
|
|
``` |
|
import os |
|
import torch |
|
import torch.nn as nn |
|
import torch.optim as optim |
|
from torch.utils.data import DataLoader, Dataset |
|
from torchvision import transforms |
|
from PIL import Image |
|
import numpy as np |
|
|
|
# Custom dataset for loading the images |
|
class PixelArtDataset(Dataset): |
|
def __init__(self, image_folder, transform=None): |
|
self.image_folder = image_folder |
|
self.transform = transform |
|
self.image_files = [f"{i}.png" for i in range(1, 8889)] |
|
|
|
# Debug: Check if images are correctly listed |
|
print(f"Total images found: {len(self.image_files)}") |
|
|
|
def __len__(self): |
|
return len(self.image_files) |
|
|
|
def __getitem__(self, idx): |
|
img_path = os.path.join(self.image_folder, self.image_files[idx]) |
|
image = Image.open(img_path).convert("RGB") |
|
if self.transform: |
|
image = self.transform(image) |
|
return image, image |
|
|
|
# Define the neural network |
|
class PixelArtGenerator(nn.Module): |
|
def __init__(self): |
|
super(PixelArtGenerator, self).__init__() |
|
print("Initializing PixelArtGenerator Model...") |
|
self.encoder = nn.Sequential( |
|
nn.Conv2d(3, 64, kernel_size=4, stride=2, padding=1), |
|
nn.ReLU(), |
|
nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1), |
|
nn.BatchNorm2d(128), |
|
nn.ReLU(), |
|
nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1), |
|
nn.BatchNorm2d(256), |
|
nn.ReLU() |
|
) |
|
self.decoder = nn.Sequential( |
|
nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1), |
|
nn.BatchNorm2d(128), |
|
nn.ReLU(), |
|
nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1), |
|
nn.BatchNorm2d(64), |
|
nn.ReLU(), |
|
nn.ConvTranspose2d(64, 3, kernel_size=4, stride=2, padding=1), |
|
nn.Tanh() |
|
) |
|
|
|
def forward(self, x): |
|
x = self.encoder(x) |
|
x = self.decoder(x) |
|
return x |
|
|
|
def train(model, dataloader, criterion, optimizer, device, epochs=50): |
|
print("Starting training...") |
|
model.train() |
|
for epoch in range(epochs): |
|
running_loss = 0.0 |
|
print(f"Epoch [{epoch+1}/{epochs}] starting...") |
|
for batch_idx, (input_images, target_images) in enumerate(dataloader): |
|
input_images, target_images = input_images.to(device), target_images.to(device) |
|
optimizer.zero_grad() |
|
outputs = model(input_images) |
|
loss = criterion(outputs, target_images) |
|
loss.backward() |
|
optimizer.step() |
|
running_loss += loss.item() |
|
|
|
# Debug: Print progress for every batch |
|
if batch_idx % 10 == 0: |
|
print(f"Epoch [{epoch+1}/{epochs}], Batch [{batch_idx+1}/{len(dataloader)}], Loss: {loss.item():.4f}") |
|
|
|
print(f"Epoch [{epoch+1}/{epochs}] completed with Loss: {running_loss/len(dataloader):.4f}") |
|
|
|
def create_pixel_art(model, input_image_path, output_image_path, device): |
|
print("Creating pixel art...") |
|
model.eval() |
|
transform = transforms.Compose([ |
|
transforms.Resize((64, 64)), |
|
transforms.ToTensor(), |
|
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) |
|
]) |
|
image = Image.open(input_image_path).convert("RGB") |
|
input_image = transform(image).unsqueeze(0).to(device) |
|
with torch.no_grad(): |
|
output_image = model(input_image).squeeze(0).cpu().numpy() |
|
output_image = np.transpose(output_image, (1, 2, 0)) |
|
output_image = (output_image * 0.5 + 0.5) * 255.0 |
|
output_image = np.clip(output_image, 0, 255).astype(np.uint8) |
|
output_image = Image.fromarray(output_image) |
|
output_image.save(output_image_path) |
|
print(f"Pixel art saved to {output_image_path}") |
|
|
|
if __name__ == "__main__": |
|
# Transform for input images |
|
print("Setting up image transformations...") |
|
transform = transforms.Compose([ |
|
transforms.Resize((64, 64)), # Resize to 64x64 for input |
|
transforms.ToTensor(), |
|
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) |
|
]) |
|
|
|
# Load dataset |
|
print("Loading dataset...") |
|
image_folder = "./" # Change this to your images folder path |
|
dataset = PixelArtDataset(image_folder, transform) |
|
dataloader = DataLoader(dataset, batch_size=8, shuffle=True) # Reduce batch size for debugging |
|
|
|
# Check for GPU availability |
|
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") |
|
print(f"Using device: {device}") |
|
|
|
# Initialize the model, criterion, and optimizer |
|
model = PixelArtGenerator().to(device) |
|
criterion = nn.MSELoss() |
|
optimizer = optim.Adam(model.parameters(), lr=0.0002) |
|
|
|
# Enable data parallelism if multiple GPUs are available |
|
if torch.cuda.device_count() > 1: |
|
print(f"Using {torch.cuda.device_count()} GPUs") |
|
model = nn.DataParallel(model) |
|
|
|
# Train the model |
|
train(model, dataloader, criterion, optimizer, device, epochs=50) |
|
|
|
# Save the model |
|
torch.save(model.state_dict(), "pixel_art_generator.pth") |
|
print("Model saved as 'pixel_art_generator.pth'") |
|
|
|
# Create pixel art from a new input image |
|
input_image_path = "input_image.png" # Path to the high-resolution input image |
|
output_image_path = "pixel_art.png" # Path to save the generated pixel art |
|
create_pixel_art(model, input_image_path, output_image_path, device) |
|
print("Pixel art creation completed.") |
|
``` |
|
|
|
The training happened on a Cognition PRO called poiesis. It consisted of 50 epochs, and it lasted for about 4 hours running on 2x NVIDIA RTX 4090. |
|
|
|
Its intended usage is for it to transform any image into its corresponding in pixels, as you can see on this one. |
|
|
|
For running it like such, you can run the following python code on the containing folder of the model (for transforming an image called pfp.png): |
|
|
|
``` |
|
import torch |
|
import torch.nn as nn |
|
from PIL import Image |
|
import numpy as np |
|
from torchvision import transforms |
|
import os |
|
|
|
# Define the neural network (same as the one used during training) |
|
class PixelArtGenerator(nn.Module): |
|
def __init__(self): |
|
super(PixelArtGenerator, self).__init__() |
|
print("Initializing PixelArtGenerator Model...") |
|
self.encoder = nn.Sequential( |
|
nn.Conv2d(3, 64, kernel_size=4, stride=2, padding=1), |
|
nn.ReLU(), |
|
nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1), |
|
nn.BatchNorm2d(128), |
|
nn.ReLU(), |
|
nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1), |
|
nn.BatchNorm2d(256), |
|
nn.ReLU() |
|
) |
|
self.decoder = nn.Sequential( |
|
nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1), |
|
nn.BatchNorm2d(128), |
|
nn.ReLU(), |
|
nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1), |
|
nn.BatchNorm2d(64), |
|
nn.ReLU(), |
|
nn.ConvTranspose2d(64, 3, kernel_size=4, stride=2, padding=1), |
|
nn.Tanh() |
|
) |
|
|
|
def forward(self, x): |
|
x = self.encoder(x) |
|
x = self.decoder(x) |
|
return x |
|
|
|
def create_pixel_art(model, input_image_path, output_image_path, device): |
|
print(f"Creating pixel art for {input_image_path}...") |
|
|
|
# Check if the input image file exists |
|
if not os.path.isfile(input_image_path): |
|
print(f"Error: Input image file '{input_image_path}' not found.") |
|
return |
|
|
|
model.eval() |
|
print("Model set to evaluation mode.") |
|
|
|
# Define the transformation for the input image |
|
transform = transforms.Compose([ |
|
transforms.Resize((64, 64)), |
|
transforms.ToTensor(), |
|
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) |
|
]) |
|
print("Image transformation defined.") |
|
|
|
# Load and preprocess the input image |
|
image = Image.open(input_image_path).convert("RGB") |
|
input_image = transform(image).unsqueeze(0).to(device) |
|
print(f"Input image '{input_image_path}' loaded and preprocessed.") |
|
|
|
# Generate pixel art using the model |
|
with torch.no_grad(): |
|
output_image = model(input_image).squeeze(0).cpu().numpy() |
|
print("Pixel art generated by the model.") |
|
|
|
# Post-process and save the output image |
|
output_image = np.transpose(output_image, (1, 2, 0)) |
|
output_image = (output_image * 0.5 + 0.5) * 255.0 |
|
output_image = np.clip(output_image, 0, 255).astype(np.uint8) |
|
output_image = Image.fromarray(output_image) |
|
|
|
# Scale up the image to iPhone 11 width (828 pixels) |
|
scaled_output_image = output_image.resize((828, int(828 * output_image.size[1] / output_image.size[0])), Image.NEAREST) |
|
scaled_output_image.save(output_image_path) |
|
print(f"Pixel art saved to '{output_image_path}'.") |
|
|
|
if __name__ == "__main__": |
|
print("Starting pixel art generation script...") |
|
|
|
# Load the trained model |
|
model = PixelArtGenerator() |
|
model_path = "pixel_art_generator.pth" # Path to the saved model |
|
print(f"Loading model from '{model_path}'...") |
|
|
|
# Load model with handling for DataParallel |
|
state_dict = torch.load(model_path) |
|
if 'module.' in list(state_dict.keys())[0]: |
|
# Remove 'module.' prefix if model was saved with DataParallel |
|
state_dict = {k.replace('module.', ''): v for k, v in state_dict.items()} |
|
model.load_state_dict(state_dict) |
|
print("Model loaded successfully.") |
|
|
|
# Check for GPU availability |
|
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") |
|
model.to(device) |
|
print(f"Using device: {device}") |
|
|
|
# Define the input and output paths for the single image |
|
input_image_path = "pfp.jpeg" # Path to the input image |
|
output_image_path = "pfp_pixelated.png" # Path to save the generated pixel art |
|
|
|
# Create pixel art for the single image |
|
create_pixel_art(model, input_image_path, output_image_path, device) |
|
|
|
print("Pixel art creation completed for the single image.") |
|
|
|
``` |
|
|
|
Hope you enjoy, and any questions that you may have, feel free to reach out to @jpfraneto on telegram. |
|
|
|
If you want to contribute to Anky, we have plenty of compute available, and a powerful story (and intention) that puts the unfolding of AI at the core of our experience as humans. |
|
|
|
Think of it as a playground for your inner child, with boundless potential. |
|
|
|
Our farcaster channel is here: https://warpcast.com/~/channel/anky |
|
|
|
Your uniqueness is a gift. |
|
|
|
🎩 |
|
|