Spaces:

ayush2003
/

pose_detector_3

Sleeping

App Files Files Community

ayush2003 commited on Dec 4, 2023

Commit

7138195

•

1 Parent(s): a0dfc33

initial commit

Browse files

Files changed (2) hide show

app.py +520 -0
requirements.txt +7 -0

app.py ADDED Viewed

	@@ -0,0 +1,520 @@

+# -*- coding: utf-8 -*-
+"""Yet another copy of Final CNN Pose Notebook.ipynb
+Automatically generated by Colaboratory.
+Original file is located at
+    https://colab.research.google.com/drive/1IdEBDyEyKQdRRT9R-GkfrJINmHdf3_pF
+"""
+# from google.colab import drive
+# drive.mount('/content/drive')
+# pip install gradio
+import gradio as gr
+import torch
+from torch.utils.data import DataLoader, Dataset, random_split
+from torchvision import transforms, utils
+import torch.nn as nn
+import torch.optim as optim
+import torch.nn.functional as F
+from PIL import Image
+import os
+import numpy as np
+import json
+import matplotlib.pyplot as plt
+from torch.utils.data.dataloader import default_collate
+# Define the dataset class
+class HumanPoseDataset(Dataset):
+    def __init__(self, annotations, img_dir, transform=None):
+        self.annotations = annotations
+        self.img_dir = img_dir
+        self.transform = transform
+    def __len__(self):
+        return len(self.annotations)
+    def __getitem__(self, idx):
+        img_key = list(self.annotations.keys())[idx]
+        annotation_list = self.annotations[img_key]
+        # Skip the image if there are no annotations
+        if not annotation_list:
+            return None
+        # Use the first annotation for simplicity
+        annotation = annotation_list[0]
+        if not annotation['landmarks']:  # Check if landmarks are not empty
+            return None
+        img_name = os.path.join(self.img_dir, annotation['file'])
+        image = Image.open(img_name).convert('RGB')
+        original_image_size = image.size
+        keypoints = annotation['landmarks']
+        keypoints_array = np.array([[k['x'], k['y'], k['z'], k['visibility']] for k in keypoints])
+        if self.transform:
+            image = self.transform(image)
+        sample = {'image': image, 'keypoints': keypoints_array, 'original_image_size': original_image_size}
+        print(sample)
+        return sample
+# Custom collate function to filter out None values
+def custom_collate(batch):
+    batch = [b for b in batch if b is not None]
+    return default_collate(batch)
+# Load the annotations JSON into a dictionary
+annotations_path = '/content/drive/MyDrive/annotations_CNN (3).json'  # Update this path
+with open(annotations_path) as f:
+    annotations_data = json.load(f)
+print("Annotations data loaded. Number of images:", len(annotations_data))
+x = annotations_data.keys()
+"""# Do data preprocessing. For example, resize to 32 by 32 and normalization.
+"""
+img_dir = '/content/drive/MyDrive/CNN_Dataset'
+# Define the transformations with resizing and augmentation
+transform = transforms.Compose([
+    transforms.Resize((32, 32)),  # Resize the images to 256x256
+    transforms.ToTensor(),
+    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+    transforms.RandomHorizontalFlip(),  # Example augmentation
+    # Add more augmentations if needed
+])
+test_transform=transforms.Compose([
+    transforms.ToTensor(),
+    transforms.Resize((32,32)),
+])
+# Create the dataset
+human_pose_dataset = HumanPoseDataset(annotations_data, img_dir, transform=transform)
+testing_pose_dataset = HumanPoseDataset(annotations_data, img_dir, transform=test_transform)
+print("Dataset created. Length of dataset:", len(human_pose_dataset))
+sorted(x) == sorted(os.listdir('/content/drive/MyDrive/CNN_Dataset'))
+"""#2. Load parameters of a pretrained model. If a pretrained model for the entire network is not available, then load parameters for the backbone network/feature extraction network/encoder.
+Pose net model is not available so we will be using an architecture similar to PoseNet, a human pose detection CNN architecture. In the above architecture, we are given a brief description about the PoseNet Architecture. We will be using the Regression Network to find the keypoint coordinates.
+"""
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import torch.nn.functional as F
+class SimpleCNN(nn.Module):
+    def __init__(self):
+        super(SimpleCNN, self).__init__()
+        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
+        self.pool = nn.MaxPool2d(2, 2)
+        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
+        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
+        self.conv4 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
+        # Assuming the input image size is 256x256, after four pooling layers the image size will be 16x16
+        self.fc1 = nn.Linear(2 * 16 * 16, 1000)
+        self.fc2 = nn.Linear(1000, 33 * 4)  # Assuming 33 keypoints
+    def forward(self, x):
+        x = self.pool(F.relu(self.conv1(x)))
+        x = self.pool(F.relu(self.conv2(x)))
+        x = self.pool(F.relu(self.conv3(x)))
+        x = self.pool(F.relu(self.conv4(x)))
+        x = torch.flatten(x, 1)  # Flatten the tensor for the fully connected layer
+        x = F.relu(self.fc1(x))
+        x = self.fc2(x)
+        return x
+# Initialize the model
+model = SimpleCNN()
+print("Model initialized.")
+print(model)  # Print the model architecture
+#!pip install mediapipe
+"""#3 Replace the output layer if necessary and finetune the network for your dataset. Use validation dataset to pick a good learning rate and momentum.
+1. Training for a very less samples
+"""
+# Split the dataset into training, validation, and test sets
+train_size = int(0.04* len(human_pose_dataset))
+validation_size = int(0.1 * len(human_pose_dataset))
+test_size = len(human_pose_dataset) - train_size - validation_size
+train_dataset, remaining_dataset = random_split(human_pose_dataset, [train_size, validation_size + test_size])
+validation_dataset, test_dataset = random_split(remaining_dataset, [validation_size, test_size])
+test_pose_dataset , remaining_data = random_split(testing_pose_dataset,[6,194])
+# Define the batch size
+batch_size = 8
+# Create data loaders for each set with the custom collate function
+train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=custom_collate)
+validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False, collate_fn=custom_collate)
+test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=custom_collate)
+test_image_loader = DataLoader(test_pose_dataset, batch_size=batch_size, shuffle=False, collate_fn=custom_collate)
+print("Data loaders created.")
+len(train_dataset)
+# Loss function
+criterion = nn.MSELoss()
+# Optimizer
+optimizer = optim.Adam(model.parameters(), lr=1e-4)
+# Convert the model parameters to float
+model = model.float()
+# Ensure that the tensors are also floats
+sample_batch = next(iter(train_loader))
+#import mediapipe as mp
+images = sample_batch['image'].float()  # Convert images to float
+keypoints = sample_batch['keypoints'].view(-1, 132).float()  # Convert keypoints to float and reshape
+# Now proceed with the optimization loop
+loss=0
+for epochs in range(10):
+  optimizer.zero_grad()
+  outputs = model(images)
+  loss = criterion(outputs, keypoints)
+  loss.backward()
+  optimizer.step()
+  print("Optimization step completed.")
+  print(loss.item())
+  loss=loss.item()
+import torch
+def calculate_accuracy(outputs, targets):
+      accuracy = torch.mean(torch.abs(outputs - targets))
+      return accuracy
+print(outputs.shape)
+# Calculate accuracy
+with torch.no_grad():
+    accuracy = calculate_accuracy(outputs, keypoints)
+    accuracy= 1- accuracy/132
+print("Loss:", loss)
+print("Accuracy:", accuracy.item()*100, '%')
+"""As you can see, the accuracy is very close to 100% (Overfitting)
+Now taking 80-10-10 split on the dataset, we create new train, val and test loaders
+"""
+# Split the dataset into training, validation, and test sets
+train_size = int(0.8* len(human_pose_dataset))
+validation_size = int(0.1 * len(human_pose_dataset))
+test_size = len(human_pose_dataset) - train_size - validation_size
+train_dataset, remaining_dataset = random_split(human_pose_dataset, [train_size, validation_size + test_size])
+validation_dataset, test_dataset = random_split(remaining_dataset, [validation_size, test_size])
+test_pose_dataset , remaining_data = random_split(testing_pose_dataset,[6,194])
+# Define the batch size
+batch_size = 8
+# Create data loaders for each set with the custom collate function
+train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=custom_collate)
+validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False, collate_fn=custom_collate)
+test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=custom_collate)
+test_image_loader = DataLoader(test_pose_dataset, batch_size=batch_size, shuffle=False, collate_fn=custom_collate)
+print("Data loaders created.")
+len(test_dataset)
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import DataLoader, random_split
+from torchvision import transforms
+import torch.nn.functional as F
+class SimpleCNN(nn.Module):
+# Define hyperparameters to search over
+      learning_rates = [0.001, 0.01, 0.1]
+      momentums = [0.9, 0.95, 0.99]
+      weight_decays = [0.0001, 0.001, 0.01]
+      best_loss = float('inf')
+      best_lr, best_momentum, best_weight_decay = None, None, None
+      # Grid search over hyperparameters
+      for lr in learning_rates:
+          for momentum in momentums:
+              for weight_decay in weight_decays:
+                  # Initialize the model with the current set of hyperparameters
+                  model = SimpleCNN()
+                  # Define loss function and optimizer
+                  criterion = nn.MSELoss()
+                  optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay)
+                  # Ensure that the tensors are also floats
+                  sample_batch = next(iter(train_loader))
+                  images = sample_batch['image'].float()  # Convert images to float
+                  keypoints = sample_batch['keypoints'].view(-1, 132).float()  # Convert keypoints to float and reshape
+                  # Now proceed with the optimization loop
+                  optimizer.zero_grad()
+                  outputs = model(images)
+                  print("Output shape after forward pass:", outputs.shape)
+                  outputs = model(images)
+                  loss = criterion(outputs, keypoints)
+                  print("Initial loss:", loss.item())
+                  loss.backward()
+                  optimizer.step()
+                  print("Optimization step completed.")
+                  total_loss = 0
+                  avg_loss = total_loss / len(train_loader)
+                  model.train()
+                  # Check if the current set of hyperparameters resulted in a better performance
+                  if avg_loss < best_loss:
+                      best_loss = avg_loss
+                      best_lr, best_momentum, best_weight_decay = lr, momentum, weight_decay
+      # After the grid search, choose the hyperparameters that performed the best
+      print("Best Hyperparameters - lr: {}, momentum: {}, weight_decay: {}".format(
+          best_lr, best_momentum, best_weight_decay))
+      # Train the final model with the selected hyperparameters on the full dataset
+      model = SimpleCNN()
+      optimizer = optim.SGD(model.parameters(), lr=best_lr, momentum=best_momentum, weight_decay=best_weight_decay)
+"""#3. Plotting Validation and Test Loss
+The best parameters are:
+*   Learning Rate: 0.001
+*   Momentum: 0.9
+*   Weight Decay: 0.0001
+"""
+import torch
+import matplotlib.pyplot as plt
+# Assuming you have already defined your model, optimizer, and criterion
+# Ensure that the tensors are also floats for training
+sample_batch = next(iter(train_loader))
+images = sample_batch['image'].float()
+keypoints = sample_batch['keypoints'].view(-1, 132).float()
+# Ensure that the tensors are also floats for validation
+validation_sample_batch = next(iter(validation_loader))
+validation_images = validation_sample_batch['image'].float()
+validation_keypoints = validation_sample_batch['keypoints'].view(-1, 132).float()
+# Now proceed with the optimization loop
+optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
+criterion = torch.nn.MSELoss()
+train_loss = []
+val_loss = []
+for epoch in range(20):
+    model.train()
+    optimizer.zero_grad()
+    outputs = model(images)
+    current_loss = criterion(outputs, keypoints)
+    current_loss.backward()
+    optimizer.step()
+    model.eval()  # Switch to evaluation mode for validation
+    with torch.no_grad():
+        # Calculate validation loss
+        val_outputs = model(validation_images)
+        val_current_loss = criterion(val_outputs, validation_keypoints)
+    print(f"Epoch [{epoch + 1}/100], Loss: {current_loss.item():.4f}, Val Loss: {val_current_loss.item():.4f}")
+    train_loss.append(current_loss.item())
+    val_loss.append(val_current_loss.item())
+plotting_val_loss = val_loss
+plotting_train_loss = train_loss
+import matplotlib.pyplot as plt
+# Plotting
+plt.figure(figsize=(8, 4))
+plt.plot( plotting_train_loss, marker='o', linestyle='-', color='b',label='train loss')
+plt.plot( plotting_val_loss, marker='o', linestyle= '-', color='r', label='val loss')
+plt.title('Loss vs Epochs')
+plt.xlabel('Epochs')
+plt.ylabel('Loss')
+plt.grid(True)
+plt.legend()
+# Show the legend in a small box
+plt.legend(loc='upper right')
+plt.show()
+"""#4. Final Run on Test Dataset"""
+# Ensure that the tensors are also floats
+sample_batch = next(iter(test_loader))
+#import mediapipe as mp
+test_images = sample_batch['image'].float()  # Convert images to float
+test_keypoints = sample_batch['keypoints'].view(-1, 132).float()  # Convert keypoints to float and reshape
+model.eval()
+optimizer.zero_grad()
+outputs = model(test_images)
+print("Testing Done")
+test_images.shape
+test_actual_plot = test_keypoints.reshape(len(test_images),33,4)[0]
+test_predict_plot = outputs.reshape(len(test_images),33,4)[0]
+test_predict_plot.shape
+"""# 4. Finally, evaluate on the test dataset."""
+import cv2
+import matplotlib.pyplot as plt
+import numpy as np
+def plot_human_pose(keypoints):
+    # Create a figure and axis
+    fig, ax = plt.subplots()
+    # Plot keypoints
+    for i in range(len(keypoints)):
+        x, y, _, _ = keypoints[i]
+        ax.scatter(x, -y, color='blue')  # Invert y-axis
+    # Connect body parts
+    connect_lines = [(0, 2), (2, 7),   # Left eye
+                     (0, 5), (5, 8),   # Right eye
+                     (9,10),  # Left side
+                     (11, 12), (12, 24), (11, 23),  # Right side
+                     (24,23), (24,26), (23,25),  # Connect ears and wrists
+                     (26, 28), (25, 27),
+                     (28, 30), (28, 32), (30,32),# Connect left and right pinky fingers
+                     (27, 29), (27, 31), (31,29),  # Connect left and right index fingers
+                     (12, 14), (11, 13),  # Connect left and right thumbs
+                     (14, 16), (13, 15),  # Connect left and right hips
+                     (16, 18), (18, 20), (16,20), (16,22),  # Connect left and right knees
+                     (15, 17), (15, 19),  # Connect left and right ankles
+                     (17, 19), (15, 21)]  # Connect left and right heels
+    for line in connect_lines:
+        start, end = line
+        x_vals = [keypoints[start][0], keypoints[end][0]]
+        y_vals = [-keypoints[start][1], -keypoints[end][1]]  # Invert y-axis
+        ax.plot(x_vals, y_vals, linewidth=2, color='red')
+    ax.set_aspect('equal', adjustable='datalim')
+    plt.title('Actual Pose')
+    plt.axis('off')
+    plt.show()
+# Example usage:
+keypoints = test_actual_plot  # Replace with your 33 key points
+plot_human_pose(keypoints)
+def plot_human_pose(keypoints):
+    # Create a figure and axis
+    fig, ax = plt.subplots()
+    # Plot keypoints
+    for i in range(len(keypoints)):
+        x, y, _, _ = keypoints[i]
+        ax.scatter(x, -y, color='blue')  # Invert y-axis
+    # Connect body parts
+    connect_lines = [(0, 2), (2, 7),   # Left eye
+                     (0, 5), (5, 8),   # Right eye
+                     (9,10),  # Left side
+                     (11, 12), (12, 24), (11, 23),  # Right side
+                     (24,23), (24,26), (23,25),  # Connect ears and wrists
+                     (26, 28), (25, 27),
+                     (28, 30), (28, 32), (30,32),# Connect left and right pinky fingers
+                     (27, 29), (27, 31), (31,29),  # Connect left and right index fingers
+                     (12, 14), (11, 13),  # Connect left and right thumbs
+                     (14, 16), (13, 15),  # Connect left and right hips
+                     (16, 18), (18, 20), (16,20), (16,22),  # Connect left and right knees
+                     (15, 17), (15, 19),  # Connect left and right ankles
+                     (17, 19), (15, 21)]  # Connect left and right heels
+    for line in connect_lines:
+        start, end = line
+        x_vals = [keypoints[start][0], keypoints[end][0]]
+        y_vals = [-keypoints[start][1], -keypoints[end][1]]  # Invert y-axis
+        ax.plot(x_vals, y_vals, linewidth=2, color='green')
+    ax.set_aspect('equal', adjustable='datalim')
+    plt.title('Predicted Pose')
+    plt.axis('off')
+    plt.show()
+# Example usage:
+keypoints = test_predict_plot.detach().numpy() # Replace with your 33 key points
+plot_human_pose(keypoints)
+"""### As you can see, the model predicts the pose of the person very accurately as depicted by its train and validation accuracy"""
+# torch.save(model.state_dict(), '/content/drive/MyDrive/Ayush sarangi/model.pth')
+torch.save( model, '/content/drive/MyDrive/Ayush sarangi/entire_model.pt')
+import cv2
+# test_image = cv2.imread('/content/drive/MyDrive/CNN_Dataset/02e442be-aec7-4f7c-93a7-e4246d0e1f93.JPG')
+# # test_image = cv2.resize(test_image, (32,32))
+# # test_image.shape
+def predict_pose(test_image):
+  img = cv2.resize(test_image, (32,32))
+  convert_tensor = transforms.ToTensor()
+  tensor_img = convert_tensor(img)
+  tensor_img = tensor_img[None,:,:,:]
+  model.eval()
+  optimizer.zero_grad()
+  outputs = model(tensor_img)
+  pred_keypoints = outputs.reshape(1,33,4)[0]
+  pred_keypoints = pred_keypoints.detach().numpy()
+  return plot_human_pose(pred_keypoints)
+predict_pose(test_image)
+pose_detector = gr.Interface(fn = predict_pose, inputs = gr.Image(type = 'pil'), label = "Image" )
+pose_detector.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+gradio==4.7.1
+matplotlib==3.8.2
+mediapipe==0.10.8
+numpy==1.23.5
+Pillow==10.1.0
+torch==2.1.1
+torchvision==0.16.1