Spaces:
Sleeping
Sleeping
# -*- coding: utf-8 -*- | |
"""Yet another copy of Final CNN Pose Notebook.ipynb | |
Automatically generated by Colaboratory. | |
Original file is located at | |
https://colab.research.google.com/drive/1IdEBDyEyKQdRRT9R-GkfrJINmHdf3_pF | |
""" | |
# from google.colab import drive | |
# drive.mount('/content/drive') | |
# pip install gradio | |
import gradio as gr | |
import torch | |
from torch.utils.data import DataLoader, Dataset, random_split | |
from torchvision import transforms, utils | |
import torch.nn as nn | |
import torch.optim as optim | |
import torch.nn.functional as F | |
from PIL import Image | |
import os | |
import numpy as np | |
import json | |
import matplotlib.pyplot as plt | |
from torch.utils.data.dataloader import default_collate | |
# Define the dataset class | |
class HumanPoseDataset(Dataset): | |
def __init__(self, annotations, img_dir, transform=None): | |
self.annotations = annotations | |
self.img_dir = img_dir | |
self.transform = transform | |
def __len__(self): | |
return len(self.annotations) | |
def __getitem__(self, idx): | |
img_key = list(self.annotations.keys())[idx] | |
annotation_list = self.annotations[img_key] | |
# Skip the image if there are no annotations | |
if not annotation_list: | |
return None | |
# Use the first annotation for simplicity | |
annotation = annotation_list[0] | |
if not annotation['landmarks']: # Check if landmarks are not empty | |
return None | |
img_name = os.path.join(self.img_dir, annotation['file']) | |
image = Image.open(img_name).convert('RGB') | |
original_image_size = image.size | |
keypoints = annotation['landmarks'] | |
keypoints_array = np.array([[k['x'], k['y'], k['z'], k['visibility']] for k in keypoints]) | |
if self.transform: | |
image = self.transform(image) | |
sample = {'image': image, 'keypoints': keypoints_array, 'original_image_size': original_image_size} | |
print(sample) | |
return sample | |
# Custom collate function to filter out None values | |
def custom_collate(batch): | |
batch = [b for b in batch if b is not None] | |
return default_collate(batch) | |
# Load the annotations JSON into a dictionary | |
annotations_path = '/content/drive/MyDrive/annotations_CNN (3).json' # Update this path | |
with open(annotations_path) as f: | |
annotations_data = json.load(f) | |
print("Annotations data loaded. Number of images:", len(annotations_data)) | |
x = annotations_data.keys() | |
"""# Do data preprocessing. For example, resize to 32 by 32 and normalization. | |
""" | |
img_dir = '/content/drive/MyDrive/CNN_Dataset' | |
# Define the transformations with resizing and augmentation | |
transform = transforms.Compose([ | |
transforms.Resize((32, 32)), # Resize the images to 256x256 | |
transforms.ToTensor(), | |
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), | |
transforms.RandomHorizontalFlip(), # Example augmentation | |
# Add more augmentations if needed | |
]) | |
test_transform=transforms.Compose([ | |
transforms.ToTensor(), | |
transforms.Resize((32,32)), | |
]) | |
# Create the dataset | |
human_pose_dataset = HumanPoseDataset(annotations_data, img_dir, transform=transform) | |
testing_pose_dataset = HumanPoseDataset(annotations_data, img_dir, transform=test_transform) | |
print("Dataset created. Length of dataset:", len(human_pose_dataset)) | |
sorted(x) == sorted(os.listdir('/content/drive/MyDrive/CNN_Dataset')) | |
"""#2. Load parameters of a pretrained model. If a pretrained model for the entire network is not available, then load parameters for the backbone network/feature extraction network/encoder. | |
Pose net model is not available so we will be using an architecture similar to PoseNet, a human pose detection CNN architecture. In the above architecture, we are given a brief description about the PoseNet Architecture. We will be using the Regression Network to find the keypoint coordinates. | |
""" | |
import torch | |
import torch.nn as nn | |
import torch.optim as optim | |
import torch.nn.functional as F | |
class SimpleCNN(nn.Module): | |
def __init__(self): | |
super(SimpleCNN, self).__init__() | |
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1) | |
self.pool = nn.MaxPool2d(2, 2) | |
self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1) | |
self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1) | |
self.conv4 = nn.Conv2d(64, 128, kernel_size=3, padding=1) | |
# Assuming the input image size is 256x256, after four pooling layers the image size will be 16x16 | |
self.fc1 = nn.Linear(2 * 16 * 16, 1000) | |
self.fc2 = nn.Linear(1000, 33 * 4) # Assuming 33 keypoints | |
def forward(self, x): | |
x = self.pool(F.relu(self.conv1(x))) | |
x = self.pool(F.relu(self.conv2(x))) | |
x = self.pool(F.relu(self.conv3(x))) | |
x = self.pool(F.relu(self.conv4(x))) | |
x = torch.flatten(x, 1) # Flatten the tensor for the fully connected layer | |
x = F.relu(self.fc1(x)) | |
x = self.fc2(x) | |
return x | |
# Initialize the model | |
model = SimpleCNN() | |
print("Model initialized.") | |
print(model) # Print the model architecture | |
#!pip install mediapipe | |
"""#3 Replace the output layer if necessary and finetune the network for your dataset. Use validation dataset to pick a good learning rate and momentum. | |
1. Training for a very less samples | |
""" | |
# Split the dataset into training, validation, and test sets | |
train_size = int(0.04* len(human_pose_dataset)) | |
validation_size = int(0.1 * len(human_pose_dataset)) | |
test_size = len(human_pose_dataset) - train_size - validation_size | |
train_dataset, remaining_dataset = random_split(human_pose_dataset, [train_size, validation_size + test_size]) | |
validation_dataset, test_dataset = random_split(remaining_dataset, [validation_size, test_size]) | |
test_pose_dataset , remaining_data = random_split(testing_pose_dataset,[6,194]) | |
# Define the batch size | |
batch_size = 8 | |
# Create data loaders for each set with the custom collate function | |
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=custom_collate) | |
validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False, collate_fn=custom_collate) | |
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=custom_collate) | |
test_image_loader = DataLoader(test_pose_dataset, batch_size=batch_size, shuffle=False, collate_fn=custom_collate) | |
print("Data loaders created.") | |
len(train_dataset) | |
# Loss function | |
criterion = nn.MSELoss() | |
# Optimizer | |
optimizer = optim.Adam(model.parameters(), lr=1e-4) | |
# Convert the model parameters to float | |
model = model.float() | |
# Ensure that the tensors are also floats | |
sample_batch = next(iter(train_loader)) | |
#import mediapipe as mp | |
images = sample_batch['image'].float() # Convert images to float | |
keypoints = sample_batch['keypoints'].view(-1, 132).float() # Convert keypoints to float and reshape | |
# Now proceed with the optimization loop | |
loss=0 | |
for epochs in range(10): | |
optimizer.zero_grad() | |
outputs = model(images) | |
loss = criterion(outputs, keypoints) | |
loss.backward() | |
optimizer.step() | |
print("Optimization step completed.") | |
print(loss.item()) | |
loss=loss.item() | |
import torch | |
def calculate_accuracy(outputs, targets): | |
accuracy = torch.mean(torch.abs(outputs - targets)) | |
return accuracy | |
print(outputs.shape) | |
# Calculate accuracy | |
with torch.no_grad(): | |
accuracy = calculate_accuracy(outputs, keypoints) | |
accuracy= 1- accuracy/132 | |
print("Loss:", loss) | |
print("Accuracy:", accuracy.item()*100, '%') | |
"""As you can see, the accuracy is very close to 100% (Overfitting) | |
Now taking 80-10-10 split on the dataset, we create new train, val and test loaders | |
""" | |
# Split the dataset into training, validation, and test sets | |
train_size = int(0.8* len(human_pose_dataset)) | |
validation_size = int(0.1 * len(human_pose_dataset)) | |
test_size = len(human_pose_dataset) - train_size - validation_size | |
train_dataset, remaining_dataset = random_split(human_pose_dataset, [train_size, validation_size + test_size]) | |
validation_dataset, test_dataset = random_split(remaining_dataset, [validation_size, test_size]) | |
test_pose_dataset , remaining_data = random_split(testing_pose_dataset,[6,194]) | |
# Define the batch size | |
batch_size = 8 | |
# Create data loaders for each set with the custom collate function | |
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=custom_collate) | |
validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False, collate_fn=custom_collate) | |
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=custom_collate) | |
test_image_loader = DataLoader(test_pose_dataset, batch_size=batch_size, shuffle=False, collate_fn=custom_collate) | |
print("Data loaders created.") | |
len(test_dataset) | |
import torch | |
import torch.nn as nn | |
import torch.optim as optim | |
from torch.utils.data import DataLoader, random_split | |
from torchvision import transforms | |
import torch.nn.functional as F | |
class SimpleCNN(nn.Module): | |
# Define hyperparameters to search over | |
learning_rates = [0.001, 0.01, 0.1] | |
momentums = [0.9, 0.95, 0.99] | |
weight_decays = [0.0001, 0.001, 0.01] | |
best_loss = float('inf') | |
best_lr, best_momentum, best_weight_decay = None, None, None | |
# Grid search over hyperparameters | |
for lr in learning_rates: | |
for momentum in momentums: | |
for weight_decay in weight_decays: | |
# Initialize the model with the current set of hyperparameters | |
model = SimpleCNN() | |
# Define loss function and optimizer | |
criterion = nn.MSELoss() | |
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay) | |
# Ensure that the tensors are also floats | |
sample_batch = next(iter(train_loader)) | |
images = sample_batch['image'].float() # Convert images to float | |
keypoints = sample_batch['keypoints'].view(-1, 132).float() # Convert keypoints to float and reshape | |
# Now proceed with the optimization loop | |
optimizer.zero_grad() | |
outputs = model(images) | |
print("Output shape after forward pass:", outputs.shape) | |
outputs = model(images) | |
loss = criterion(outputs, keypoints) | |
print("Initial loss:", loss.item()) | |
loss.backward() | |
optimizer.step() | |
print("Optimization step completed.") | |
total_loss = 0 | |
avg_loss = total_loss / len(train_loader) | |
model.train() | |
# Check if the current set of hyperparameters resulted in a better performance | |
if avg_loss < best_loss: | |
best_loss = avg_loss | |
best_lr, best_momentum, best_weight_decay = lr, momentum, weight_decay | |
# After the grid search, choose the hyperparameters that performed the best | |
print("Best Hyperparameters - lr: {}, momentum: {}, weight_decay: {}".format( | |
best_lr, best_momentum, best_weight_decay)) | |
# Train the final model with the selected hyperparameters on the full dataset | |
model = SimpleCNN() | |
optimizer = optim.SGD(model.parameters(), lr=best_lr, momentum=best_momentum, weight_decay=best_weight_decay) | |
"""#3. Plotting Validation and Test Loss | |
The best parameters are: | |
* Learning Rate: 0.001 | |
* Momentum: 0.9 | |
* Weight Decay: 0.0001 | |
""" | |
import torch | |
import matplotlib.pyplot as plt | |
# Assuming you have already defined your model, optimizer, and criterion | |
# Ensure that the tensors are also floats for training | |
sample_batch = next(iter(train_loader)) | |
images = sample_batch['image'].float() | |
keypoints = sample_batch['keypoints'].view(-1, 132).float() | |
# Ensure that the tensors are also floats for validation | |
validation_sample_batch = next(iter(validation_loader)) | |
validation_images = validation_sample_batch['image'].float() | |
validation_keypoints = validation_sample_batch['keypoints'].view(-1, 132).float() | |
# Now proceed with the optimization loop | |
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) | |
criterion = torch.nn.MSELoss() | |
train_loss = [] | |
val_loss = [] | |
for epoch in range(20): | |
model.train() | |
optimizer.zero_grad() | |
outputs = model(images) | |
current_loss = criterion(outputs, keypoints) | |
current_loss.backward() | |
optimizer.step() | |
model.eval() # Switch to evaluation mode for validation | |
with torch.no_grad(): | |
# Calculate validation loss | |
val_outputs = model(validation_images) | |
val_current_loss = criterion(val_outputs, validation_keypoints) | |
print(f"Epoch [{epoch + 1}/100], Loss: {current_loss.item():.4f}, Val Loss: {val_current_loss.item():.4f}") | |
train_loss.append(current_loss.item()) | |
val_loss.append(val_current_loss.item()) | |
plotting_val_loss = val_loss | |
plotting_train_loss = train_loss | |
import matplotlib.pyplot as plt | |
# Plotting | |
plt.figure(figsize=(8, 4)) | |
plt.plot( plotting_train_loss, marker='o', linestyle='-', color='b',label='train loss') | |
plt.plot( plotting_val_loss, marker='o', linestyle= '-', color='r', label='val loss') | |
plt.title('Loss vs Epochs') | |
plt.xlabel('Epochs') | |
plt.ylabel('Loss') | |
plt.grid(True) | |
plt.legend() | |
# Show the legend in a small box | |
plt.legend(loc='upper right') | |
plt.show() | |
"""#4. Final Run on Test Dataset""" | |
# Ensure that the tensors are also floats | |
sample_batch = next(iter(test_loader)) | |
#import mediapipe as mp | |
test_images = sample_batch['image'].float() # Convert images to float | |
test_keypoints = sample_batch['keypoints'].view(-1, 132).float() # Convert keypoints to float and reshape | |
model.eval() | |
optimizer.zero_grad() | |
outputs = model(test_images) | |
print("Testing Done") | |
test_images.shape | |
test_actual_plot = test_keypoints.reshape(len(test_images),33,4)[0] | |
test_predict_plot = outputs.reshape(len(test_images),33,4)[0] | |
test_predict_plot.shape | |
"""# 4. Finally, evaluate on the test dataset.""" | |
import cv2 | |
import matplotlib.pyplot as plt | |
import numpy as np | |
def plot_human_pose(keypoints): | |
# Create a figure and axis | |
fig, ax = plt.subplots() | |
# Plot keypoints | |
for i in range(len(keypoints)): | |
x, y, _, _ = keypoints[i] | |
ax.scatter(x, -y, color='blue') # Invert y-axis | |
# Connect body parts | |
connect_lines = [(0, 2), (2, 7), # Left eye | |
(0, 5), (5, 8), # Right eye | |
(9,10), # Left side | |
(11, 12), (12, 24), (11, 23), # Right side | |
(24,23), (24,26), (23,25), # Connect ears and wrists | |
(26, 28), (25, 27), | |
(28, 30), (28, 32), (30,32),# Connect left and right pinky fingers | |
(27, 29), (27, 31), (31,29), # Connect left and right index fingers | |
(12, 14), (11, 13), # Connect left and right thumbs | |
(14, 16), (13, 15), # Connect left and right hips | |
(16, 18), (18, 20), (16,20), (16,22), # Connect left and right knees | |
(15, 17), (15, 19), # Connect left and right ankles | |
(17, 19), (15, 21)] # Connect left and right heels | |
for line in connect_lines: | |
start, end = line | |
x_vals = [keypoints[start][0], keypoints[end][0]] | |
y_vals = [-keypoints[start][1], -keypoints[end][1]] # Invert y-axis | |
ax.plot(x_vals, y_vals, linewidth=2, color='red') | |
ax.set_aspect('equal', adjustable='datalim') | |
plt.title('Actual Pose') | |
plt.axis('off') | |
plt.show() | |
# Example usage: | |
keypoints = test_actual_plot # Replace with your 33 key points | |
plot_human_pose(keypoints) | |
def plot_human_pose(keypoints): | |
# Create a figure and axis | |
fig, ax = plt.subplots() | |
# Plot keypoints | |
for i in range(len(keypoints)): | |
x, y, _, _ = keypoints[i] | |
ax.scatter(x, -y, color='blue') # Invert y-axis | |
# Connect body parts | |
connect_lines = [(0, 2), (2, 7), # Left eye | |
(0, 5), (5, 8), # Right eye | |
(9,10), # Left side | |
(11, 12), (12, 24), (11, 23), # Right side | |
(24,23), (24,26), (23,25), # Connect ears and wrists | |
(26, 28), (25, 27), | |
(28, 30), (28, 32), (30,32),# Connect left and right pinky fingers | |
(27, 29), (27, 31), (31,29), # Connect left and right index fingers | |
(12, 14), (11, 13), # Connect left and right thumbs | |
(14, 16), (13, 15), # Connect left and right hips | |
(16, 18), (18, 20), (16,20), (16,22), # Connect left and right knees | |
(15, 17), (15, 19), # Connect left and right ankles | |
(17, 19), (15, 21)] # Connect left and right heels | |
for line in connect_lines: | |
start, end = line | |
x_vals = [keypoints[start][0], keypoints[end][0]] | |
y_vals = [-keypoints[start][1], -keypoints[end][1]] # Invert y-axis | |
ax.plot(x_vals, y_vals, linewidth=2, color='green') | |
ax.set_aspect('equal', adjustable='datalim') | |
plt.title('Predicted Pose') | |
plt.axis('off') | |
plt.show() | |
# Example usage: | |
keypoints = test_predict_plot.detach().numpy() # Replace with your 33 key points | |
plot_human_pose(keypoints) | |
"""### As you can see, the model predicts the pose of the person very accurately as depicted by its train and validation accuracy""" | |
# torch.save(model.state_dict(), '/content/drive/MyDrive/Ayush sarangi/model.pth') | |
torch.save( model, '/content/drive/MyDrive/Ayush sarangi/entire_model.pt') | |
import cv2 | |
# test_image = cv2.imread('/content/drive/MyDrive/CNN_Dataset/02e442be-aec7-4f7c-93a7-e4246d0e1f93.JPG') | |
# # test_image = cv2.resize(test_image, (32,32)) | |
# # test_image.shape | |
def predict_pose(test_image): | |
img = cv2.resize(test_image, (32,32)) | |
convert_tensor = transforms.ToTensor() | |
tensor_img = convert_tensor(img) | |
tensor_img = tensor_img[None,:,:,:] | |
model.eval() | |
optimizer.zero_grad() | |
outputs = model(tensor_img) | |
pred_keypoints = outputs.reshape(1,33,4)[0] | |
pred_keypoints = pred_keypoints.detach().numpy() | |
return plot_human_pose(pred_keypoints) | |
predict_pose(test_image) | |
pose_detector = gr.Interface(fn = predict_pose, inputs = gr.Image(type = 'pil'), label = "Image" ) | |
pose_detector.launch() | |